Unverified Commit 5d8330cc authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Test] Add model speed and accuracy tests for RGCN (#2458)

* add rgcn acc bench

* fix issue of multiple parametrize

* model acc/speed test: RGCN

* fix dep problem in docker run

* add docstring
parent f8b3ebce
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
], ],
// List of branches to benchmark. If not provided, defaults to "master" // List of branches to benchmark. If not provided, defaults to "master"
// (for git) or "default" (for mercurial). // (for git) or "default" (for mercurial).
"branches": ["master", "0.5.0", "0.5.2", "0.5.3", "0.4.3.post2"], // for git "branches": ["HEAD"], // for git
// The DVCS being used. If not set, it will be automatically // The DVCS being used. If not set, it will be automatically
// determined from "repo" by looking at the protocol in the URL // determined from "repo" by looking at the protocol in the URL
// (if remote), or by looking for special directories, such as // (if remote), or by looking for special directories, such as
......
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
from .. import utils
class RGCN(nn.Module):
def __init__(self,
num_nodes,
n_hidden,
num_classes,
num_rels,
num_bases,
num_hidden_layers,
dropout):
super(RGCN, self).__init__()
self.layers = nn.ModuleList()
# i2h
self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout,
low_mem=True))
# h2h
for i in range(num_hidden_layers):
self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout,
low_mem=True))
# o2h
self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
num_bases, activation=None, low_mem=True))
def forward(self, g, h, r, norm):
for layer in self.layers:
h = layer(g, h, r, norm)
return h
def evaluate(model, g, feats, edge_type, edge_norm, labels, idx):
model.eval()
with torch.no_grad():
logits = model(g, feats, edge_type, edge_norm)
logits = logits[idx]
_, indices = torch.max(logits, dim=1)
correct = torch.sum(indices == labels)
return correct.item() * 1.0 / len(labels) * 100
@utils.benchmark('acc')
@utils.parametrize('data', ['aifb', 'mutag'])
def track_acc(data):
# args
if data == 'aifb':
num_bases = -1
l2norm = 0.
elif data == 'mutag':
num_bases = 30
l2norm = 5e-4
elif data == 'am':
num_bases = 40
l2norm = 5e-4
else:
raise ValueError()
data = utils.process_data(data)
device = utils.get_bench_device()
g = data[0]
num_rels = len(g.canonical_etypes)
category = data.predict_category
num_classes = data.num_classes
train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
labels = g.nodes[category].data.pop('labels').to(device)
# calculate norm for each edge type and store in edge
for canonical_etype in g.canonical_etypes:
u, v, eid = g.all_edges(form='all', etype=canonical_etype)
_, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
degrees = count[inverse_index]
norm = 1. / degrees.float()
norm = norm.unsqueeze(1)
g.edges[canonical_etype].data['norm'] = norm
# get target category id
category_id = len(g.ntypes)
for i, ntype in enumerate(g.ntypes):
if ntype == category:
category_id = i
g = dgl.to_homogeneous(g, edata=['norm']).to(device)
num_nodes = g.number_of_nodes()
edge_norm = g.edata['norm']
edge_type = g.edata[dgl.ETYPE].long()
# find out the target node ids in g
target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
train_idx = target_idx[train_mask]
test_idx = target_idx[test_mask]
train_labels = labels[train_mask]
test_labels = labels[test_mask]
# since the nodes are featureless, the input feature is then the node id.
feats = torch.arange(num_nodes, device=device)
# create model
model = RGCN(num_nodes,
16,
num_classes,
num_rels,
num_bases,
0,
0).to(device)
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-2,
weight_decay=l2norm)
model.train()
for epoch in range(30):
logits = model(g, feats, edge_type, edge_norm)
loss = F.cross_entropy(logits[train_idx], train_labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = evaluate(model, g, feats, edge_type, edge_norm, test_labels, test_idx)
return acc
...@@ -98,4 +98,4 @@ def track_time(data): ...@@ -98,4 +98,4 @@ def track_time(data):
optimizer.step() optimizer.step()
t1 = time.time() t1 = time.time()
return t1 - t0 return (t1 - t0) / num_epochs
import time
import dgl
from dgl.nn.pytorch import RelGraphConv
import torch
import torch.nn as nn
import torch.nn.functional as F
from .. import utils
class RGCN(nn.Module):
def __init__(self,
num_nodes,
n_hidden,
num_classes,
num_rels,
num_bases,
num_hidden_layers,
dropout):
super(RGCN, self).__init__()
self.layers = nn.ModuleList()
# i2h
self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout,
low_mem=True))
# h2h
for i in range(num_hidden_layers):
self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
num_bases, activation=F.relu, dropout=dropout,
low_mem=True))
# o2h
self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
num_bases, activation=None, low_mem=True))
def forward(self, g, h, r, norm):
for layer in self.layers:
h = layer(g, h, r, norm)
return h
@utils.benchmark('time', 3600)
@utils.parametrize('data', ['aifb', 'am'])
def track_time(data):
# args
if data == 'aifb':
num_bases = -1
l2norm = 0.
elif data == 'am':
num_bases = 40
l2norm = 5e-4
else:
raise ValueError()
data = utils.process_data(data)
device = utils.get_bench_device()
num_epochs = 30
g = data[0]
num_rels = len(g.canonical_etypes)
category = data.predict_category
num_classes = data.num_classes
train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
labels = g.nodes[category].data.pop('labels').to(device)
# calculate norm for each edge type and store in edge
for canonical_etype in g.canonical_etypes:
u, v, eid = g.all_edges(form='all', etype=canonical_etype)
_, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
degrees = count[inverse_index]
norm = 1. / degrees.float()
norm = norm.unsqueeze(1)
g.edges[canonical_etype].data['norm'] = norm
# get target category id
category_id = len(g.ntypes)
for i, ntype in enumerate(g.ntypes):
if ntype == category:
category_id = i
g = dgl.to_homogeneous(g, edata=['norm']).to(device)
num_nodes = g.number_of_nodes()
edge_norm = g.edata['norm']
edge_type = g.edata[dgl.ETYPE].long()
# find out the target node ids in g
target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
train_idx = target_idx[train_mask]
test_idx = target_idx[test_mask]
train_labels = labels[train_mask]
test_labels = labels[test_mask]
# since the nodes are featureless, the input feature is then the node id.
feats = torch.arange(num_nodes, device=device)
# create model
model = RGCN(num_nodes,
16,
num_classes,
num_rels,
num_bases,
0,
0).to(device)
optimizer = torch.optim.Adam(model.parameters(),
lr=1e-2,
weight_decay=l2norm)
model.train()
t0 = time.time()
for epoch in range(num_epochs):
logits = model(g, feats, edge_type, edge_norm)
loss = F.cross_entropy(logits[train_idx], train_labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
t1 = time.time()
return (t1 - t0) / num_epochs
...@@ -89,4 +89,4 @@ def track_time(data): ...@@ -89,4 +89,4 @@ def track_time(data):
optimizer.step() optimizer.step()
t1 = time.time() t1 = time.time()
return t1 - t0 return (t1 - t0) / num_epochs
...@@ -130,4 +130,4 @@ def track_time(data): ...@@ -130,4 +130,4 @@ def track_time(data):
t1 = time.time() t1 = time.time()
return t1 - t0 return (t1 - t0) / num_epochs
...@@ -180,4 +180,4 @@ def track_time(data): ...@@ -180,4 +180,4 @@ def track_time(data):
t1 = time.time() t1 = time.time()
return t1 - t0 return (t1 - t0) / num_epochs
import os import os
import shutil, zipfile import shutil, zipfile
import requests import requests
import inspect
import numpy as np import numpy as np
import pandas import pandas
import dgl import dgl
...@@ -37,7 +38,7 @@ def get_graph(name): ...@@ -37,7 +38,7 @@ def get_graph(name):
print(name + " doesn't exist") print(name + " doesn't exist")
return None return None
class ogb_data(object): class OGBDataset(object):
def __init__(self, g, num_labels): def __init__(self, g, num_labels):
self._g = g self._g = g
self._num_labels = num_labels self._num_labels = num_labels
...@@ -81,13 +82,21 @@ def load_ogb_product(name): ...@@ -81,13 +82,21 @@ def load_ogb_product(name):
graph.ndata['val_mask'] = val_mask graph.ndata['val_mask'] = val_mask
graph.ndata['test_mask'] = test_mask graph.ndata['test_mask'] = test_mask
return ogb_data(graph, num_labels) return OGBDataset(graph, num_labels)
def process_data(name): def process_data(name):
if name == 'cora': if name == 'cora':
return dgl.data.CoraGraphDataset() return dgl.data.CoraGraphDataset()
elif name == 'pubmed': elif name == 'pubmed':
return dgl.data.PubmedGraphDataset() return dgl.data.PubmedGraphDataset()
elif name == 'aifb':
return dgl.data.AIFBDataset()
elif name == 'mutag':
return dgl.data.MUTAGDataset()
elif name == 'bgs':
return dgl.data.BGSDataset()
elif name == 'am':
return dgl.data.AMDataset()
elif name == 'reddit': elif name == 'reddit':
return dgl.data.RedditDataset(self_loop=True) return dgl.data.RedditDataset(self_loop=True)
elif name == 'ogbn-products': elif name == 'ogbn-products':
...@@ -119,17 +128,88 @@ TRACK_SETUP = { ...@@ -119,17 +128,88 @@ TRACK_SETUP = {
} }
def parametrize(param_name, params): def parametrize(param_name, params):
"""Decorator for benchmarking over a set of parameters.
Parameters
----------
param_name : str
Parameter name. Must be one of the arguments of the decorated function.
params : list[any]
List of values to benchmark for the given parameter name. Recommend
to use Python's native object type (e.g., int, str, list[int]) because
ASV will display them on the plot.
Examples
--------
Benchmark function `foo` when argument `x` is equal to 10 or 20.
.. code::
@benchmark('time')
@parametrize('x', [10, 20]):
def foo(x):
pass
Benchmark function with multiple parametrizations. It will run the function
with all possible combinations. The example below generates 6 benchmarks.
.. code::
@benchmark('time')
@parametrize('x', [10, 20]):
@parametrize('y', [-1, -2, -3]):
def foo(x, y):
pass
When using multiple parametrizations, it can have arbitrary order. The example
below is the same as the above one.
.. code::
@benchmark('time')
@parametrize('y', [-1, -2, -3]):
@parametrize('x', [10, 20]):
def foo(x, y):
pass
"""
def _wrapper(func): def _wrapper(func):
sig_params = inspect.signature(func).parameters.keys()
num_params = len(sig_params)
if getattr(func, 'params', None) is None: if getattr(func, 'params', None) is None:
func.params = [] func.params = [None] * num_params
func.params.append(params)
if getattr(func, 'param_names', None) is None: if getattr(func, 'param_names', None) is None:
func.param_names = [] func.param_names = [None] * num_params
func.param_names.append(param_name) found_param = False
for i, sig_param in enumerate(sig_params):
if sig_param == param_name:
func.params[i] = params
func.param_names[i] = param_name
found_param = True
break
if not found_param:
raise ValueError('Invalid parameter name:', param_name)
return func return func
return _wrapper return _wrapper
def benchmark(track_type, timeout=60): def benchmark(track_type, timeout=60):
"""Decorator for indicating the benchmark type.
Parameters
----------
track_type : str
Type. Must be either:
- 'time' : For timing. Unit: second.
- 'acc' : For accuracy. Unit: percentage, value between 0 and 100.
timeout : int
Timeout threshold in second.
Examples
--------
.. code::
@benchmark('time')
def foo():
pass
"""
assert track_type in ['time', 'acc'] assert track_type in ['time', 'acc']
def _wrapper(func): def _wrapper(func):
func.unit = TRACK_UNITS[track_type] func.unit = TRACK_UNITS[track_type]
......
...@@ -5,7 +5,7 @@ set -e ...@@ -5,7 +5,7 @@ set -e
. /opt/conda/etc/profile.d/conda.sh . /opt/conda/etc/profile.d/conda.sh
pip install -r /asv/torch_gpu_pip.txt pip install -r /asv/torch_gpu_pip.txt
pip install pandas pip install pandas rdflib
# install # install
pushd python pushd python
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment