[Test] Add model speed and accuracy tests for RGCN (#2458)

* add rgcn acc bench * fix issue of multiple parametrize * model acc/speed test: RGCN * fix dep problem in docker run * add docstring

[Test] Add model speed and accuracy tests for RGCN (#2458)
* add rgcn acc bench * fix issue of multiple parametrize * model acc/speed test: RGCN * fix dep problem in docker run * add docstring
5d8330cc · Minjie Wang · GitHub · f8b3ebce · 5d8330cc · 5d8330cc
Unverified Commit 5d8330cc authored Dec 28, 2020 by Minjie Wang Committed by GitHub Dec 28, 2020
9 changed files
--- a/benchmarks/asv.conf.json
+++ b/benchmarks/asv.conf.json
@@ -27,7 +27,7 @@
    ],
    // List of branches to benchmark. If not provided, defaults to "master"
    // (for git) or "default" (for mercurial).
-    "branches": ["master", "0.5.0", "0.5.2", "0.5.3", "0.4.3.post2"], // for git
+    "branches": ["HEAD"], // for git
    // The DVCS being used.  If not set, it will be automatically
    // determined from "repo" by looking at the protocol in the URL
    // (if remote), or by looking for special directories, such as

--- a/benchmarks/benchmarks/model_acc/bench_rgcn.py
+++ b/benchmarks/benchmarks/model_acc/bench_rgcn.py
+import dgl
+from dgl.nn.pytorch import RelGraphConv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .. import utils
+
+class RGCN(nn.Module):
+    def __init__(self,
+                 num_nodes,
+                 n_hidden,
+                 num_classes,
+                 num_rels,
+                 num_bases,
+                 num_hidden_layers,
+                 dropout):
+        super(RGCN, self).__init__()
+        self.layers = nn.ModuleList()
+        # i2h
+        self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
+                                        num_bases, activation=F.relu, dropout=dropout,
+                                        low_mem=True))
+        # h2h
+        for i in range(num_hidden_layers):
+            self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
+                                            num_bases, activation=F.relu, dropout=dropout,
+                                            low_mem=True))
+        # o2h
+        self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
+                                        num_bases, activation=None, low_mem=True))
+
+    def forward(self, g, h, r, norm):
+        for layer in self.layers:
+            h = layer(g, h, r, norm)
+        return h
+
+def evaluate(model, g, feats, edge_type, edge_norm, labels, idx):
+    model.eval()
+    with torch.no_grad():
+        logits = model(g, feats, edge_type, edge_norm)
+        logits = logits[idx]
+        _, indices = torch.max(logits, dim=1)
+        correct = torch.sum(indices == labels)
+        return correct.item() * 1.0 / len(labels) * 100
+
+@utils.benchmark('acc')
+@utils.parametrize('data', ['aifb', 'mutag'])
+def track_acc(data):
+    # args
+    if data == 'aifb':
+        num_bases = -1
+        l2norm = 0.
+    elif data == 'mutag':
+        num_bases = 30
+        l2norm = 5e-4
+    elif data == 'am':
+        num_bases = 40
+        l2norm = 5e-4
+    else:
+        raise ValueError()
+
+    data = utils.process_data(data)
+    device = utils.get_bench_device()
+
+    g = data[0]
+
+    num_rels = len(g.canonical_etypes)
+    category = data.predict_category
+    num_classes = data.num_classes
+    train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
+    test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
+    labels = g.nodes[category].data.pop('labels').to(device)
+    
+    # calculate norm for each edge type and store in edge
+    for canonical_etype in g.canonical_etypes:
+        u, v, eid = g.all_edges(form='all', etype=canonical_etype)
+        _, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
+        degrees = count[inverse_index]
+        norm = 1. / degrees.float()
+        norm = norm.unsqueeze(1)
+        g.edges[canonical_etype].data['norm'] = norm
+
+    # get target category id
+    category_id = len(g.ntypes)
+    for i, ntype in enumerate(g.ntypes):
+        if ntype == category:
+            category_id = i
+
+    g = dgl.to_homogeneous(g, edata=['norm']).to(device)
+    num_nodes = g.number_of_nodes()
+    edge_norm = g.edata['norm']
+    edge_type = g.edata[dgl.ETYPE].long()
+
+    # find out the target node ids in g
+    target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
+    train_idx = target_idx[train_mask]
+    test_idx = target_idx[test_mask]
+    train_labels = labels[train_mask]
+    test_labels = labels[test_mask]
+
+    # since the nodes are featureless, the input feature is then the node id.
+    feats = torch.arange(num_nodes, device=device)
+
+    # create model
+    model = RGCN(num_nodes, 
+                 16,
+                 num_classes,
+                 num_rels,
+                 num_bases,
+                 0,
+                 0).to(device)
+
+    optimizer = torch.optim.Adam(model.parameters(),
+                                 lr=1e-2,
+                                 weight_decay=l2norm)
+
+    model.train()
+    for epoch in range(30):
+        logits = model(g, feats, edge_type, edge_norm)
+        loss = F.cross_entropy(logits[train_idx], train_labels)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+    acc = evaluate(model, g, feats, edge_type, edge_norm, test_labels, test_idx)
+    return acc
--- a/benchmarks/benchmarks/model_speed/bench_gat.py
+++ b/benchmarks/benchmarks/model_speed/bench_gat.py
@@ -98,4 +98,4 @@ def track_time(data):
        optimizer.step()
    t1 = time.time()

-    return t1 - t0
+    return (t1 - t0) / num_epochs
--- a/benchmarks/benchmarks/model_speed/bench_rgcn.py
+++ b/benchmarks/benchmarks/model_speed/bench_rgcn.py
+import time
+import dgl
+from dgl.nn.pytorch import RelGraphConv
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from .. import utils
+
+class RGCN(nn.Module):
+    def __init__(self,
+                 num_nodes,
+                 n_hidden,
+                 num_classes,
+                 num_rels,
+                 num_bases,
+                 num_hidden_layers,
+                 dropout):
+        super(RGCN, self).__init__()
+        self.layers = nn.ModuleList()
+        # i2h
+        self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
+                                        num_bases, activation=F.relu, dropout=dropout,
+                                        low_mem=True))
+        # h2h
+        for i in range(num_hidden_layers):
+            self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
+                                            num_bases, activation=F.relu, dropout=dropout,
+                                            low_mem=True))
+        # o2h
+        self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
+                                        num_bases, activation=None, low_mem=True))
+
+    def forward(self, g, h, r, norm):
+        for layer in self.layers:
+            h = layer(g, h, r, norm)
+        return h
+
+@utils.benchmark('time', 3600)
+@utils.parametrize('data', ['aifb', 'am'])
+def track_time(data):
+    # args
+    if data == 'aifb':
+        num_bases = -1
+        l2norm = 0.
+    elif data == 'am':
+        num_bases = 40
+        l2norm = 5e-4
+    else:
+        raise ValueError()
+
+    data = utils.process_data(data)
+    device = utils.get_bench_device()
+    num_epochs = 30
+
+    g = data[0]
+
+    num_rels = len(g.canonical_etypes)
+    category = data.predict_category
+    num_classes = data.num_classes
+    train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
+    test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
+    labels = g.nodes[category].data.pop('labels').to(device)
+    
+    # calculate norm for each edge type and store in edge
+    for canonical_etype in g.canonical_etypes:
+        u, v, eid = g.all_edges(form='all', etype=canonical_etype)
+        _, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
+        degrees = count[inverse_index]
+        norm = 1. / degrees.float()
+        norm = norm.unsqueeze(1)
+        g.edges[canonical_etype].data['norm'] = norm
+
+    # get target category id
+    category_id = len(g.ntypes)
+    for i, ntype in enumerate(g.ntypes):
+        if ntype == category:
+            category_id = i
+
+    g = dgl.to_homogeneous(g, edata=['norm']).to(device)
+    num_nodes = g.number_of_nodes()
+    edge_norm = g.edata['norm']
+    edge_type = g.edata[dgl.ETYPE].long()
+
+    # find out the target node ids in g
+    target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
+    train_idx = target_idx[train_mask]
+    test_idx = target_idx[test_mask]
+    train_labels = labels[train_mask]
+    test_labels = labels[test_mask]
+
+    # since the nodes are featureless, the input feature is then the node id.
+    feats = torch.arange(num_nodes, device=device)
+
+    # create model
+    model = RGCN(num_nodes, 
+                 16,
+                 num_classes,
+                 num_rels,
+                 num_bases,
+                 0,
+                 0).to(device)
+
+    optimizer = torch.optim.Adam(model.parameters(),
+                                 lr=1e-2,
+                                 weight_decay=l2norm)
+
+    model.train()
+    t0 = time.time()
+    for epoch in range(num_epochs):
+        logits = model(g, feats, edge_type, edge_norm)
+        loss = F.cross_entropy(logits[train_idx], train_labels)
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+    t1 = time.time()
+
+    return (t1 - t0) / num_epochs
--- a/benchmarks/benchmarks/model_speed/bench_sage.py
+++ b/benchmarks/benchmarks/model_speed/bench_sage.py
@@ -89,4 +89,4 @@ def track_time(data):
        optimizer.step()
    t1 = time.time()

-    return t1 - t0
+    return (t1 - t0) / num_epochs
--- a/benchmarks/benchmarks/model_speed/bench_sage_ns.py
+++ b/benchmarks/benchmarks/model_speed/bench_sage_ns.py
@@ -130,4 +130,4 @@ def track_time(data):

    t1 = time.time()

-    return t1 - t0
+    return (t1 - t0) / num_epochs
--- a/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py
+++ b/benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py
@@ -180,4 +180,4 @@ def track_time(data):

    t1 = time.time()

-    return t1 - t0
+    return (t1 - t0) / num_epochs
--- a/benchmarks/benchmarks/utils.py
+++ b/benchmarks/benchmarks/utils.py
 import os
 import shutil, zipfile
 import requests
+import inspect
 import numpy as np
 import pandas
 import dgl
@@ -37,7 +38,7 @@ def get_graph(name):
        print(name + " doesn't exist")
        return None

-class ogb_data(object):
+class OGBDataset(object):
    def __init__(self, g, num_labels):
        self._g = g
        self._num_labels = num_labels
@@ -81,13 +82,21 @@ def load_ogb_product(name):
    graph.ndata['val_mask'] = val_mask
    graph.ndata['test_mask'] = test_mask

-    return ogb_data(graph, num_labels)
+    return OGBDataset(graph, num_labels)

 def process_data(name):
    if name == 'cora':
        return dgl.data.CoraGraphDataset()
    elif name == 'pubmed':
        return dgl.data.PubmedGraphDataset()
+    elif name == 'aifb':
+        return dgl.data.AIFBDataset()
+    elif name == 'mutag':
+        return dgl.data.MUTAGDataset()
+    elif name == 'bgs':
+        return dgl.data.BGSDataset()
+    elif name == 'am':
+        return dgl.data.AMDataset()
    elif name == 'reddit':
        return dgl.data.RedditDataset(self_loop=True)
    elif name == 'ogbn-products':
@@ -119,17 +128,88 @@ TRACK_SETUP = {
 }

 def parametrize(param_name, params):
+    """Decorator for benchmarking over a set of parameters.
+
+    Parameters
+    ----------
+    param_name : str
+        Parameter name. Must be one of the arguments of the decorated function.
+    params : list[any]
+        List of values to benchmark for the given parameter name. Recommend
+        to use Python's native object type (e.g., int, str, list[int]) because
+        ASV will display them on the plot.
+
+    Examples
+    --------
+
+    Benchmark function `foo` when argument `x` is equal to 10 or 20.
+
+    .. code::
+        @benchmark('time')
+        @parametrize('x', [10, 20]):
+        def foo(x):
+            pass
+
+    Benchmark function with multiple parametrizations. It will run the function
+    with all possible combinations. The example below generates 6 benchmarks.
+
+    .. code::
+        @benchmark('time')
+        @parametrize('x', [10, 20]):
+        @parametrize('y', [-1, -2, -3]):
+        def foo(x, y):
+            pass
+
+    When using multiple parametrizations, it can have arbitrary order. The example
+    below is the same as the above one.
+
+    .. code::
+        @benchmark('time')
+        @parametrize('y', [-1, -2, -3]):
+        @parametrize('x', [10, 20]):
+        def foo(x, y):
+            pass
+    """
    def _wrapper(func):
+        sig_params = inspect.signature(func).parameters.keys()
+        num_params = len(sig_params)
        if getattr(func, 'params', None) is None:
-            func.params = []
-        func.params.append(params)
+            func.params = [None] * num_params
        if getattr(func, 'param_names', None) is None:
-            func.param_names = []
-        func.param_names.append(param_name)
+            func.param_names = [None] * num_params
+        found_param = False
+        for i, sig_param in enumerate(sig_params):
+            if sig_param == param_name:
+                func.params[i] = params
+                func.param_names[i] = param_name
+                found_param = True
+                break
+        if not found_param:
+            raise ValueError('Invalid parameter name:', param_name)
        return func
    return _wrapper

 def benchmark(track_type, timeout=60):
+    """Decorator for indicating the benchmark type.
+
+    Parameters
+    ----------
+    track_type : str
+        Type. Must be either:
+
+            - 'time' : For timing. Unit: second.
+            - 'acc' : For accuracy. Unit: percentage, value between 0 and 100.
+    timeout : int
+        Timeout threshold in second.
+
+    Examples
+    --------
+
+    .. code::
+        @benchmark('time')
+        def foo():
+            pass
+    """
    assert track_type in ['time', 'acc']
    def _wrapper(func):
        func.unit = TRACK_UNITS[track_type]

--- a/benchmarks/install_dgl_asv.sh
+++ b/benchmarks/install_dgl_asv.sh
@@ -5,7 +5,7 @@ set -e
 . /opt/conda/etc/profile.d/conda.sh

 pip install -r /asv/torch_gpu_pip.txt
-pip install pandas
+pip install pandas rdflib

 # install
 pushd python