Unverified Commit af61e2fb authored by Zihao Ye's avatar Zihao Ye Committed by GitHub
Browse files

[Feature] Support nn modules for bipartite graphs. (#1392)



* init gat

* fix

* gin

* 7 nn modules

* rename & lint

* upd

* upd

* fix lint

* upd test

* upd

* lint

* shape check

* upd

* lint

* address comments

* update tensorflow
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
parent 67cb7a43
......@@ -3992,6 +3992,10 @@ class DGLGraph(DGLBaseGraph):
self._node_frame = old_nframe
self._edge_frame = old_eframe
def is_homograph(self):
"""Return if the graph is homogeneous."""
return True
############################################################
# Batch/Unbatch APIs
############################################################
......
......@@ -4106,6 +4106,10 @@ class DGLHeteroGraph(object):
self._node_frames = old_nframes
self._edge_frames = old_eframes
def is_homograph(self):
"""Return if the graph is homogeneous."""
return len(self.ntypes) == 1 and len(self.etypes) == 1
############################################################
# Internal APIs
############################################################
......
......@@ -6,6 +6,8 @@ from mxnet.gluon import nn
from .... import function as fn
from ..softmax import edge_softmax
from ..utils import normalize
from ....utils import expand_as_pair
class AGNNConv(nn.Block):
r"""Attention-based Graph Neural Network layer from paper `Attention-based
......@@ -47,6 +49,9 @@ class AGNNConv(nn.Block):
feat : mxnet.NDArray
The input feature of shape :math:`(N, *)` :math:`N` is the
number of nodes, and :math:`*` could be of any shape.
If a pair of mxnet.NDArray is given, the pair must contain two tensors of shape
:math:`(N_{in}, *)` and :math:`(N_{out}, *})`, the the :math:`*` in the later
tensor must equal the previous one.
Returns
-------
......@@ -55,12 +60,16 @@ class AGNNConv(nn.Block):
should be the same as input shape.
"""
graph = graph.local_var()
graph.ndata['h'] = feat
graph.ndata['norm_h'] = normalize(feat, p=2, axis=-1)
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.srcdata['norm_h'] = normalize(feat_src, p=2, axis=-1)
if isinstance(feat, tuple):
graph.dstdata['norm_h'] = normalize(feat_dst, p=2, axis=-1)
# compute cosine distance
graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
cos = graph.edata.pop('cos')
e = self.beta.data(feat.context) * cos
e = self.beta.data(feat_src.context) * cos
graph.edata['p'] = edge_softmax(graph, e)
graph.update_all(fn.u_mul_e('h', 'p', 'm'), fn.sum('m', 'h'))
return graph.ndata.pop('h')
return graph.dstdata.pop('h')
......@@ -18,8 +18,11 @@ class DenseGraphConv(nn.Block):
Input feature size.
out_feats : int
Output feature size.
norm : bool
If True, the normalizer :math:`c_{ij}` is applied. Default: ``True``.
norm : str, optional
How to apply the normalizer. If is `'right'`, divide the aggregated messages
by each node's in-degrees, which is equivalent to averaging the received messages.
If is `'none'`, no normalization is applied. Default is `'both'`,
where the :math:`c_{ij}` in the paper is applied.
bias : bool
If True, adds a learnable bias to the output. Default: ``True``.
activation : callable activation function/layer or None, optional
......@@ -33,7 +36,7 @@ class DenseGraphConv(nn.Block):
def __init__(self,
in_feats,
out_feats,
norm=True,
norm='both',
bias=True,
activation=None):
super(DenseGraphConv, self).__init__()
......@@ -56,12 +59,14 @@ class DenseGraphConv(nn.Block):
Parameters
----------
adj : mxnet.NDArray
The adjacency matrix of the graph to apply Graph Convolution on,
should be of shape :math:`(N, N)`, where a row represents the destination
and a column represents the source.
feat : mxnet.NDArray
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
is size of input feature, :math:`N` is the number of nodes.
The adjacency matrix of the graph to apply Graph Convolution on, when
applied to a unidirectional bipartite graph, ``adj`` should be of shape
should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo
graph, ``adj`` should be of shape :math:`(N, N)`. In both cases,
a row represents a destination node while a column represents a source
node.
feat : torch.Tensor
The input feature.
Returns
-------
......@@ -70,24 +75,33 @@ class DenseGraphConv(nn.Block):
is size of output feature.
"""
adj = adj.astype(feat.dtype).as_in_context(feat.context)
if self._norm:
in_degrees = adj.sum(axis=1)
norm = nd.power(in_degrees, -0.5)
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = norm.reshape(shp).as_in_context(feat.context)
feat = feat * norm
src_degrees = nd.clip(adj.sum(axis=0), a_min=1, a_max=float('inf'))
dst_degrees = nd.clip(adj.sum(axis=1), a_min=1, a_max=float('inf'))
feat_src = feat
if self._norm == 'both':
norm_src = nd.power(src_degrees, -0.5)
shp_src = norm_src.shape + (1,) * (feat.ndim - 1)
norm_src = norm_src.reshape(shp_src).as_in_context(feat.context)
feat_src = feat_src * norm_src
if self._in_feats > self._out_feats:
# mult W first to reduce the feature size for aggregation.
feat = nd.dot(feat, self.weight.data(feat.context))
rst = nd.dot(adj, feat)
feat_src = nd.dot(feat_src, self.weight.data(feat_src.context))
rst = nd.dot(adj, feat_src)
else:
# aggregate first then mult W
rst = nd.dot(adj, feat)
rst = nd.dot(rst, self.weight.data(feat.context))
rst = nd.dot(adj, feat_src)
rst = nd.dot(rst, self.weight.data(feat_src.context))
if self._norm:
rst = rst * norm
if self._norm != 'none':
if self._norm == 'both':
norm_dst = nd.power(dst_degrees, -0.5)
else: # right
norm_dst = 1.0 / dst_degrees
shp_dst = norm_dst.shape + (1,) * (feat.ndim - 1)
norm_dst = norm_dst.reshape(shp_dst).as_in_context(feat.context)
rst = rst * norm_dst
if self.bias is not None:
rst = rst + self.bias.data(feat.context)
......
......@@ -4,6 +4,7 @@ import math
import mxnet as mx
from mxnet import nd
from mxnet.gluon import nn
from ....utils import check_eq_shape
class DenseSAGEConv(nn.Block):
......@@ -56,12 +57,18 @@ class DenseSAGEConv(nn.Block):
Parameters
----------
adj : mxnet.NDArray
The adjacency matrix of the graph to apply Graph Convolution on,
should be of shape :math:`(N, N)`, where a row represents the destination
and a column represents the source.
feat : mxnet.NDArray
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
is size of input feature, :math:`N` is the number of nodes.
The adjacency matrix of the graph to apply SAGE Convolution on, when
applied to a unidirectional bipartite graph, ``adj`` should be of shape
should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo
graph, ``adj`` should be of shape :math:`(N, N)`. In both cases,
a row represents a destination node while a column represents a source
node.
feat : mxnet.NDArray or a pair of mxnet.NDArray
If a mxnet.NDArray is given, the input feature of shape :math:`(N, D_{in})`
where :math:`D_{in}` is size of input feature, :math:`N` is the number of
nodes.
If a pair of torch.Tensor is given, the pair must contain two tensors of
shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`.
Returns
-------
......@@ -69,10 +76,15 @@ class DenseSAGEConv(nn.Block):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is size of output feature.
"""
adj = adj.astype(feat.dtype).as_in_context(feat.context)
feat = self.feat_drop(feat)
check_eq_shape(feat)
if isinstance(feat, tuple):
feat_src = self.feat_drop(feat[0])
feat_dst = self.feat_drop(feat[1])
else:
feat_src = feat_dst = self.feat_drop(feat)
adj = adj.astype(feat_src.dtype).as_in_context(feat_src.context)
in_degrees = adj.sum(axis=1, keepdims=True)
h_neigh = (nd.dot(adj, feat) + feat) / (in_degrees + 1)
h_neigh = (nd.dot(adj, feat_src) + feat_dst) / (in_degrees + 1)
rst = self.fc(h_neigh)
# activation
if self.activation is not None:
......
......@@ -4,6 +4,7 @@ import mxnet as mx
from mxnet.gluon import nn
from .... import function as fn
from ....utils import expand_as_pair
class EdgeConv(nn.Block):
......@@ -60,17 +61,23 @@ class EdgeConv(nn.Block):
h : mxnet.NDArray
:math:`(N, D)` where :math:`N` is the number of nodes and
:math:`D` is the number of feature dimensions.
If a pair of tensors is given, the graph must be a uni-bipartite graph
with only one edge type, and the two tensors must have the same
dimensionality on all except the first axis.
Returns
-------
mxnet.NDArray
New node features.
"""
with g.local_scope():
g.ndata['x'] = h
h_src, h_dst = expand_as_pair(h)
g.srcdata['x'] = h_src
g.dstdata['x'] = h_dst
if not self.batch_norm:
g.update_all(self.message, fn.max('e', 'x'))
else:
g.apply_edges(self.message)
g.edata['e'] = self.bn(g.edata['e'])
g.update_all(fn.copy_e('e', 'm'), fn.max('m', 'x'))
return g.ndata['x']
return g.dstdata['x']
......@@ -7,6 +7,7 @@ from mxnet.gluon.contrib.nn import Identity
from .... import function as fn
from ..softmax import edge_softmax
from ....utils import expand_as_pair
#pylint: enable=W0235
class GATConv(nn.Block):
......@@ -26,8 +27,13 @@ class GATConv(nn.Block):
Parameters
----------
in_feats : int
in_feats : int or pair of ints
Input feature size.
If the layer is to be applied to a unidirectional bipartite graph, ``in_feats``
specifies the input feature size on both the source and destination nodes. If
a scalar is given, the source and destination node feature size would take the
same value.
out_feats : int
Output feature size.
num_heads : int
......@@ -55,9 +61,18 @@ class GATConv(nn.Block):
activation=None):
super(GATConv, self).__init__()
self._num_heads = num_heads
self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats)
self._in_feats = in_feats
self._out_feats = out_feats
with self.name_scope():
if isinstance(in_feats, tuple):
self.fc_src = nn.Dense(out_feats * num_heads, use_bias=False,
weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)),
in_units=self._in_src_feats)
self.fc_dst = nn.Dense(out_feats * num_heads, use_bias=False,
weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)),
in_units=self._in_dst_feats)
else:
self.fc = nn.Dense(out_feats * num_heads, use_bias=False,
weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)),
in_units=in_feats)
......@@ -90,8 +105,11 @@ class GATConv(nn.Block):
graph : DGLGraph
The graph.
feat : mxnet.NDArray
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
is size of input feature, :math:`N` is the number of nodes.
If a mxnet.NDArray is given, the input feature of shape :math:`(N, D_{in})`
where :math:`D_{in}` is size of input feature, :math:`N` is the number of
nodes.
If a pair of mxnet.NDArray is given, the pair must contain two tensors of
shape :math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.
Returns
-------
......@@ -100,8 +118,17 @@ class GATConv(nn.Block):
is the number of heads, and :math:`D_{out}` is size of output feature.
"""
graph = graph.local_var()
h = self.feat_drop(feat)
feat = self.fc(h).reshape(-1, self._num_heads, self._out_feats)
if isinstance(feat, tuple):
h_src = self.feat_drop(feat[0])
h_dst = self.feat_drop(feat[1])
feat_src = self.fc_src(h_src).reshape(
-1, self._num_heads, self._out_feats)
feat_dst = self.fc_dst(h_dst).reshape(
-1, self._num_heads, self._out_feats)
else:
h_src = h_dst = self.feat_drop(feat)
feat_src = feat_dst = self.fc(h_src).reshape(
-1, self._num_heads, self._out_feats)
# NOTE: GAT paper uses "first concatenation then linear projection"
# to compute attention scores, while ours is "first projection then
# addition", the two approaches are mathematically equivalent:
......@@ -112,9 +139,10 @@ class GATConv(nn.Block):
# save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
# addition could be optimized with DGL's built-in function u_add_v,
# which further speeds up computation and saves memory footprint.
el = (feat * self.attn_l.data(feat.context)).sum(axis=-1).expand_dims(-1)
er = (feat * self.attn_r.data(feat.context)).sum(axis=-1).expand_dims(-1)
graph.ndata.update({'ft': feat, 'el': el, 'er': er})
el = (feat_src * self.attn_l.data(feat_src.context)).sum(axis=-1).expand_dims(-1)
er = (feat_dst * self.attn_r.data(feat_src.context)).sum(axis=-1).expand_dims(-1)
graph.srcdata.update({'ft': feat_src, 'el': el})
graph.dstdata.update({'er': er})
# compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
e = self.leaky_relu(graph.edata.pop('e'))
......@@ -122,10 +150,10 @@ class GATConv(nn.Block):
graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
fn.sum('m', 'ft'))
rst = graph.ndata['ft']
rst = graph.dstdata['ft']
# residual
if self.res_fc is not None:
resval = self.res_fc(h).reshape(h.shape[0], -1, self._out_feats)
resval = self.res_fc(h_dst).reshape(h_dst.shape[0], -1, self._out_feats)
rst = rst + resval
# activation
if self.activation:
......
......@@ -75,6 +75,8 @@ class GatedGraphConv(nn.Block):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is the output feature size.
"""
assert graph.is_homograph(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
graph = graph.local_var()
zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]), ctx=feat.context)
feat = nd.concat(feat, zero_pad, dim=-1)
......
......@@ -4,6 +4,7 @@ import mxnet as mx
from mxnet.gluon import nn
from .... import function as fn
from ....utils import expand_as_pair
class GINConv(nn.Block):
......@@ -56,24 +57,28 @@ class GINConv(nn.Block):
----------
graph : DGLGraph
The graph.
feat : torch.Tensor
The input feature of shape :math:`(N, D)` where :math:`D`
could be any positive integer, :math:`N` is the number
of nodes. If ``apply_func`` is not None, :math:`D` should
feat : mxnet.NDArray or a pair of mxnet.NDArray
If a mxnet.NDArray is given, the input feature of shape :math:`(N, D_{in})`
where :math:`D_{in}` is size of input feature, :math:`N` is the number of
nodes.
If a pair of mxnet.NDArray is given, the pair must contain two tensors of
shape :math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`.
If ``apply_func`` is not None, :math:`D_{in}` should
fit the input dimensionality requirement of ``apply_func``.
Returns
-------
torch.Tensor
mxnet.NDArray
The output feature of shape :math:`(N, D_{out})` where
:math:`D_{out}` is the output dimensionality of ``apply_func``.
If ``apply_func`` is None, :math:`D_{out}` should be the same
as input dimensionality.
"""
graph = graph.local_var()
graph.ndata['h'] = feat
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_u('h', 'm'), self._reducer('m', 'neigh'))
rst = (1 + self.eps.data(feat.context)) * feat + graph.ndata['neigh']
rst = (1 + self.eps.data(feat_dst.context)) * feat_dst + graph.dstdata['neigh']
if self.apply_func is not None:
rst = self.apply_func(rst)
return rst
......@@ -7,6 +7,7 @@ from mxnet.gluon import nn
from mxnet.gluon.contrib.nn import Identity
from .... import function as fn
from ....utils import expand_as_pair
class GMMConv(nn.Block):
......@@ -22,8 +23,13 @@ class GMMConv(nn.Block):
Parameters
----------
in_feats : int
in_feats : int, or pair of ints
Number of input features.
If the layer is to be applied on a unidirectional bipartite graph, ``in_feats``
specifies the input feature size on both the source and destination nodes. If
a scalar is given, the source and destination node feature size would take the
same value.
out_feats : int
Number of output features.
dim : int
......@@ -46,7 +52,8 @@ class GMMConv(nn.Block):
residual=False,
bias=True):
super(GMMConv, self).__init__()
self._in_feats = in_feats
self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats)
self._out_feats = out_feats
self._dim = dim
self._n_kernels = n_kernels
......@@ -67,12 +74,12 @@ class GMMConv(nn.Block):
shape=(n_kernels, dim),
init=mx.init.Constant(1))
self.fc = nn.Dense(n_kernels * out_feats,
in_units=in_feats,
in_units=self._in_src_feats,
use_bias=False,
weight_initializer=mx.init.Xavier(magnitude=math.sqrt(2.0)))
if residual:
if in_feats != out_feats:
self.res_fc = nn.Dense(out_feats, in_units=in_feats, use_bias=False)
if self._in_dst_feats != out_feats:
self.res_fc = nn.Dense(out_feats, in_units=self._in_dst_feats, use_bias=False)
else:
self.res_fc = Identity()
else:
......@@ -93,9 +100,10 @@ class GMMConv(nn.Block):
graph : DGLGraph
The graph.
feat : mxnet.NDArray
The input feature of shape :math:`(N, D_{in})` where :math:`N`
is the number of nodes of the graph and :math:`D_{in}` is the
input feature size.
If a single tensor is given, the input feature of shape :math:`(N, D_{in})` where
:math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
If a pair of tensors are given, the pair must contain two tensors of shape
:math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.
pseudo : mxnet.NDArray
The pseudo coordinate tensor of shape :math:`(E, D_{u})` where
:math:`E` is the number of edges of the graph and :math:`D_{u}`
......@@ -107,22 +115,26 @@ class GMMConv(nn.Block):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is the output feature size.
"""
graph = graph.local_var()
graph.ndata['h'] = self.fc(feat).reshape(-1, self._n_kernels, self._out_feats)
feat_src, feat_dst = expand_as_pair(feat)
with graph.local_scope():
graph.srcdata['h'] = self.fc(feat_src).reshape(
-1, self._n_kernels, self._out_feats)
E = graph.number_of_edges()
# compute gaussian weight
gaussian = -0.5 * ((pseudo.reshape(E, 1, self._dim) -
self.mu.data(feat.context).reshape(1, self._n_kernels, self._dim)) ** 2)
self.mu.data(feat_src.context)
.reshape(1, self._n_kernels, self._dim)) ** 2)
gaussian = gaussian *\
(self.inv_sigma.data(feat.context).reshape(1, self._n_kernels, self._dim) ** 2)
(self.inv_sigma.data(feat_src.context)
.reshape(1, self._n_kernels, self._dim) ** 2)
gaussian = nd.exp(gaussian.sum(axis=-1, keepdims=True)) # (E, K, 1)
graph.edata['w'] = gaussian
graph.update_all(fn.u_mul_e('h', 'w', 'm'), self._reducer('m', 'h'))
rst = graph.ndata['h'].sum(1)
rst = graph.dstdata['h'].sum(1)
# residual connection
if self.res_fc is not None:
rst = rst + self.res_fc(feat)
rst = rst + self.res_fc(feat_dst)
# bias
if self.bias is not None:
rst = rst + self.bias.data(feat.context)
rst = rst + self.bias.data(feat_dst.context)
return rst
......@@ -110,7 +110,7 @@ class GraphConv(gluon.Block):
graph : DGLGraph
The graph.
feat : mxnet.NDArray
The input feature
The input feature.
weight : torch.Tensor, optional
Optional external weight tensor.
......
......@@ -5,6 +5,7 @@ from mxnet.gluon import nn
from mxnet.gluon.contrib.nn import Identity
from .... import function as fn
from ....utils import expand_as_pair
class NNConv(nn.Block):
......@@ -17,8 +18,13 @@ class NNConv(nn.Block):
Parameters
----------
in_feats : int
in_feats : int or pair of ints
Input feature size.
If the layer is to be applied on a unidirectional bipartite graph, ``in_feats``
specifies the input feature size on both the source and destination nodes. If
a scalar is given, the source and destination node feature size would take the
same value.
out_feats : int
Output feature size.
edge_func : callable activation function/layer
......@@ -41,7 +47,7 @@ class NNConv(nn.Block):
residual=False,
bias=True):
super(NNConv, self).__init__()
self._in_feats = in_feats
self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats)
self._out_feats = out_feats
if aggregator_type == 'sum':
self.reducer = fn.sum
......@@ -56,9 +62,10 @@ class NNConv(nn.Block):
with self.name_scope():
self.edge_nn = edge_func
if residual:
if in_feats != out_feats:
self.res_fc = nn.Dense(out_feats, in_units=in_feats, use_bias=False,
weight_initializer=mx.init.Xavier())
if self._in_dst_feats != out_feats:
self.res_fc = nn.Dense(
out_feats, in_units=self._in_dst_feats,
use_bias=False, weight_initializer=mx.init.Xavier())
else:
self.res_fc = Identity()
else:
......@@ -78,7 +85,7 @@ class NNConv(nn.Block):
----------
graph : DGLGraph
The graph.
feat : mxnet.NDArray
feat : mxnet.NDArray or pair of mxnet.NDArray
The input feature of shape :math:`(N, D_{in})` where :math:`N`
is the number of nodes of the graph and :math:`D_{in}` is the
input feature size.
......@@ -92,18 +99,20 @@ class NNConv(nn.Block):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is the output feature size.
"""
graph = graph.local_var()
with graph.local_scope():
feat_src, feat_dst = expand_as_pair(feat)
# (n, d_in, 1)
graph.ndata['h'] = feat.expand_dims(-1)
graph.srcdata['h'] = feat_src.expand_dims(-1)
# (n, d_in, d_out)
graph.edata['w'] = self.edge_nn(efeat).reshape(-1, self._in_feats, self._out_feats)
graph.edata['w'] = self.edge_nn(efeat).reshape(-1, self._in_src_feats, self._out_feats)
# (n, d_in, d_out)
graph.update_all(fn.u_mul_e('h', 'w', 'm'), self.reducer('m', 'neigh'))
rst = graph.ndata.pop('neigh').sum(axis=1) # (n, d_out)
rst = graph.dstdata.pop('neigh').sum(axis=1) # (n, d_out)
# residual connection
if self.res_fc is not None:
rst = rst + self.res_fc(feat)
rst = rst + self.res_fc(feat_dst)
# bias
if self.bias is not None:
rst = rst + self.bias.data(feat.context)
rst = rst + self.bias.data(feat_dst.context)
return rst
......@@ -175,7 +175,9 @@ class RelGraphConv(gluon.Block):
mx.ndarray.NDArray
New node features.
"""
g = g.local_var()
assert g.is_homograph(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
with g.local_scope():
g.ndata['h'] = x
g.edata['type'] = etypes
if norm is not None:
......
"""MXNet Module for GraphSAGE layer"""
# pylint: disable= no-member, arguments-differ, invalid-name
import math
from numbers import Integral
import mxnet as mx
from mxnet import nd
from mxnet.gluon import nn
from .... import function as fn
from ....utils import expand_as_pair, check_eq_shape
class SAGEConv(nn.Block):
r"""GraphSAGE layer from paper `Inductive Representation Learning on
......@@ -57,14 +57,7 @@ class SAGEConv(nn.Block):
activation=None):
super(SAGEConv, self).__init__()
if isinstance(in_feats, tuple):
self._in_src_feats = in_feats[0]
self._in_dst_feats = in_feats[1]
elif isinstance(in_feats, Integral):
self._in_src_feats = self._in_dst_feats = in_feats
else:
raise TypeError('in_feats must be either int or pair of ints')
self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats)
self._out_feats = out_feats
self._aggre_type = aggregator_type
with self.name_scope():
......@@ -92,9 +85,11 @@ class SAGEConv(nn.Block):
----------
graph : DGLGraph
The graph.
feat : mxnet.NDArray
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
is size of input feature, :math:`N` is the number of nodes.
feat : mxnet.NDArray or pair of mxnet.NDArray
If a single tensor is given, the input feature of shape :math:`(N, D_{in})` where
:math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
If a pair of tensors are given, the pair must contain two tensors of shape
:math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.
Returns
-------
......@@ -117,6 +112,7 @@ class SAGEConv(nn.Block):
graph.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'gcn':
check_eq_shape(feat)
graph.srcdata['h'] = feat_src
graph.dstdata['h'] = feat_dst # saame as above if homogeneous
graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'neigh'))
......
......@@ -76,6 +76,7 @@ class TAGConv(gluon.Block):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is size of output feature.
"""
assert graph.is_homograph(), 'Graph is not homogeneous'
graph = graph.local_var()
degs = graph.in_degrees().astype('float32')
......
......@@ -6,6 +6,7 @@ from torch.nn import functional as F
from .... import function as fn
from ..softmax import edge_softmax
from ....utils import expand_as_pair
class AGNNConv(nn.Module):
......@@ -47,6 +48,9 @@ class AGNNConv(nn.Module):
feat : torch.Tensor
The input feature of shape :math:`(N, *)` :math:`N` is the
number of nodes, and :math:`*` could be of any shape.
If a pair of torch.Tensor is given, the pair must contain two tensors of shape
:math:`(N_{in}, *)` and :math:`(N_{out}, *})`, the the :math:`*` in the later
tensor must equal the previous one.
Returns
-------
......@@ -55,12 +59,16 @@ class AGNNConv(nn.Module):
should be the same as input shape.
"""
graph = graph.local_var()
graph.ndata['h'] = feat
graph.ndata['norm_h'] = F.normalize(feat, p=2, dim=-1)
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.srcdata['norm_h'] = F.normalize(feat_src, p=2, dim=-1)
if isinstance(feat, tuple):
graph.dstdata['norm_h'] = F.normalize(feat_dst, p=2, dim=-1)
# compute cosine distance
graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
cos = graph.edata.pop('cos')
e = self.beta * cos
graph.edata['p'] = edge_softmax(graph, e)
graph.update_all(fn.u_mul_e('h', 'p', 'm'), fn.sum('m', 'h'))
return graph.ndata.pop('h')
return graph.dstdata.pop('h')
......@@ -17,8 +17,11 @@ class DenseGraphConv(nn.Module):
Input feature size.
out_feats : int
Output feature size.
norm : bool
If True, the normalizer :math:`c_{ij}` is applied. Default: ``True``.
norm : str, optional
How to apply the normalizer. If is `'right'`, divide the aggregated messages
by each node's in-degrees, which is equivalent to averaging the received messages.
If is `'none'`, no normalization is applied. Default is `'both'`,
where the :math:`c_{ij}` in the paper is applied.
bias : bool
If True, adds a learnable bias to the output. Default: ``True``.
activation : callable activation function/layer or None, optional
......@@ -32,7 +35,7 @@ class DenseGraphConv(nn.Module):
def __init__(self,
in_feats,
out_feats,
norm=True,
norm='both',
bias=True,
activation=None):
super(DenseGraphConv, self).__init__()
......@@ -60,12 +63,14 @@ class DenseGraphConv(nn.Module):
Parameters
----------
adj : torch.Tensor
The adjacency matrix of the graph to apply Graph Convolution on,
should be of shape :math:`(N, N)`, where a row represents the destination
and a column represents the source.
The adjacency matrix of the graph to apply Graph Convolution on, when
applied to a unidirectional bipartite graph, ``adj`` should be of shape
should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo
graph, ``adj`` should be of shape :math:`(N, N)`. In both cases,
a row represents a destination node while a column represents a source
node.
feat : torch.Tensor
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
is size of input feature, :math:`N` is the number of nodes.
The input feature.
Returns
-------
......@@ -74,24 +79,33 @@ class DenseGraphConv(nn.Module):
is size of output feature.
"""
adj = adj.float().to(feat.device)
if self._norm:
in_degrees = adj.sum(dim=1)
norm = th.pow(in_degrees, -0.5)
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp).to(feat.device)
feat = feat * norm
src_degrees = adj.sum(dim=0).clamp(min=1)
dst_degrees = adj.sum(dim=1).clamp(min=1)
feat_src = feat
if self._norm == 'both':
norm_src = th.pow(src_degrees, -0.5)
shp = norm_src.shape + (1,) * (feat.dim() - 1)
norm_src = th.reshape(norm_src, shp).to(feat.device)
feat_src = feat_src * norm_src
if self._in_feats > self._out_feats:
# mult W first to reduce the feature size for aggregation.
feat = th.matmul(feat, self.weight)
rst = adj @ feat
feat_src = th.matmul(feat_src, self.weight)
rst = adj @ feat_src
else:
# aggregate first then mult W
rst = adj @ feat
rst = adj @ feat_src
rst = th.matmul(rst, self.weight)
if self._norm:
rst = rst * norm
if self._norm != 'none':
if self._norm == 'both':
norm_dst = th.pow(dst_degrees, -0.5)
else: # right
norm_dst = 1.0 / dst_degrees
shp = norm_dst.shape + (1,) * (feat.dim() - 1)
norm_dst = th.reshape(norm_dst, shp).to(feat.device)
rst = rst * norm_dst
if self.bias is not None:
rst = rst + self.bias
......
"""Torch Module for DenseSAGEConv"""
# pylint: disable= no-member, arguments-differ, invalid-name
from torch import nn
from ....utils import check_eq_shape
class DenseSAGEConv(nn.Module):
......@@ -57,12 +58,17 @@ class DenseSAGEConv(nn.Module):
Parameters
----------
adj : torch.Tensor
The adjacency matrix of the graph to apply Graph Convolution on,
should be of shape :math:`(N, N)`, where a row represents the destination
and a column represents the source.
feat : torch.Tensor
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
is size of input feature, :math:`N` is the number of nodes.
The adjacency matrix of the graph to apply SAGE Convolution on, when
applied to a unidirectional bipartite graph, ``adj`` should be of shape
should be of shape :math:`(N_{out}, N_{in})`; when applied to a homo
graph, ``adj`` should be of shape :math:`(N, N)`. In both cases,
a row represents a destination node while a column represents a source
node.
feat : torch.Tensor or a pair of torch.Tensor
If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
:math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
If a pair of torch.Tensor is given, the pair must contain two tensors of shape
:math:`(N_{in}, D_{in})` and :math:`(N_{out}, D_{in})`.
Returns
-------
......@@ -70,10 +76,15 @@ class DenseSAGEConv(nn.Module):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is size of output feature.
"""
adj = adj.float().to(feat.device)
feat = self.feat_drop(feat)
check_eq_shape(feat)
if isinstance(feat, tuple):
feat_src = self.feat_drop(feat[0])
feat_dst = self.feat_drop(feat[1])
else:
feat_src = feat_dst = self.feat_drop(feat)
adj = adj.float().to(feat_src.device)
in_degrees = adj.sum(dim=1, keepdim=True)
h_neigh = (adj @ feat + feat) / (in_degrees + 1)
h_neigh = (adj @ feat_src + feat_dst) / (in_degrees + 1)
rst = self.fc(h_neigh)
# activation
if self.activation is not None:
......
......@@ -3,6 +3,7 @@
from torch import nn
from .... import function as fn
from ....utils import expand_as_pair
class EdgeConv(nn.Module):
......@@ -53,16 +54,22 @@ class EdgeConv(nn.Module):
----------
g : DGLGraph
The graph.
h : Tensor
h : Tensor or pair of tensors
:math:`(N, D)` where :math:`N` is the number of nodes and
:math:`D` is the number of feature dimensions.
If a pair of tensors is given, the graph must be a uni-bipartite graph
with only one edge type, and the two tensors must have the same
dimensionality on all except the first axis.
Returns
-------
torch.Tensor
New node features.
"""
with g.local_scope():
g.ndata['x'] = h
h_src, h_dst = expand_as_pair(h)
g.srcdata['x'] = h_src
g.dstdata['x'] = h_dst
if not self.batch_norm:
g.update_all(self.message, fn.max('e', 'x'))
else:
......@@ -88,4 +95,4 @@ class EdgeConv(nn.Module):
# images.
g.edata['e'] = self.bn(g.edata['e'])
g.update_all(fn.copy_e('e', 'e'), fn.max('e', 'x'))
return g.ndata['x']
return g.dstdata['x']
......@@ -6,6 +6,7 @@ from torch import nn
from .... import function as fn
from ..softmax import edge_softmax
from ..utils import Identity
from ....utils import expand_as_pair
# pylint: enable=W0235
class GATConv(nn.Module):
......@@ -25,8 +26,13 @@ class GATConv(nn.Module):
Parameters
----------
in_feats : int
in_feats : int, or pair of ints
Input feature size.
If the layer is to be applied to a unidirectional bipartite graph, ``in_feats``
specifies the input feature size on both the source and destination nodes. If
a scalar is given, the source and destination node feature size would take the
same value.
out_feats : int
Output feature size.
num_heads : int
......@@ -54,17 +60,25 @@ class GATConv(nn.Module):
activation=None):
super(GATConv, self).__init__()
self._num_heads = num_heads
self._in_feats = in_feats
self._in_src_feats, self._in_dst_feats = expand_as_pair(in_feats)
self._out_feats = out_feats
self.fc = nn.Linear(in_feats, out_feats * num_heads, bias=False)
if isinstance(in_feats, tuple):
self.fc_src = nn.Linear(
self._in_src_feats, out_feats * num_heads, bias=False)
self.fc_dst = nn.Linear(
self._in_dst_feats, out_feats * num_heads, bias=False)
else:
self.fc = nn.Linear(
self._in_src_feats, out_feats * num_heads, bias=False)
self.attn_l = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats)))
self.attn_r = nn.Parameter(th.FloatTensor(size=(1, num_heads, out_feats)))
self.feat_drop = nn.Dropout(feat_drop)
self.attn_drop = nn.Dropout(attn_drop)
self.leaky_relu = nn.LeakyReLU(negative_slope)
if residual:
if in_feats != out_feats:
self.res_fc = nn.Linear(in_feats, num_heads * out_feats, bias=False)
if self._in_dst_feats != out_feats:
self.res_fc = nn.Linear(
self._in_dst_feats, num_heads * out_feats, bias=False)
else:
self.res_fc = Identity()
else:
......@@ -75,7 +89,11 @@ class GATConv(nn.Module):
def reset_parameters(self):
"""Reinitialize learnable parameters."""
gain = nn.init.calculate_gain('relu')
if hasattr(self, 'fc'):
nn.init.xavier_normal_(self.fc.weight, gain=gain)
else: # bipartite graph neural networks
nn.init.xavier_normal_(self.fc_src.weight, gain=gain)
nn.init.xavier_normal_(self.fc_dst.weight, gain=gain)
nn.init.xavier_normal_(self.attn_l, gain=gain)
nn.init.xavier_normal_(self.attn_r, gain=gain)
if isinstance(self.res_fc, nn.Linear):
......@@ -88,9 +106,11 @@ class GATConv(nn.Module):
----------
graph : DGLGraph
The graph.
feat : torch.Tensor
The input feature of shape :math:`(N, D_{in})` where :math:`D_{in}`
is size of input feature, :math:`N` is the number of nodes.
feat : torch.Tensor or pair of torch.Tensor
If a torch.Tensor is given, the input feature of shape :math:`(N, D_{in})` where
:math:`D_{in}` is size of input feature, :math:`N` is the number of nodes.
If a pair of torch.Tensor is given, the pair must contain two tensors of shape
:math:`(N_{in}, D_{in_{src}})` and :math:`(N_{out}, D_{in_{dst}})`.
Returns
-------
......@@ -99,8 +119,15 @@ class GATConv(nn.Module):
is the number of heads, and :math:`D_{out}` is size of output feature.
"""
graph = graph.local_var()
h = self.feat_drop(feat)
feat = self.fc(h).view(-1, self._num_heads, self._out_feats)
if isinstance(feat, tuple):
h_src = self.feat_drop(feat[0])
h_dst = self.feat_drop(feat[1])
feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats)
feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats)
else:
h_src = h_dst = self.feat_drop(feat)
feat_src = feat_dst = self.fc(h_src).view(
-1, self._num_heads, self._out_feats)
# NOTE: GAT paper uses "first concatenation then linear projection"
# to compute attention scores, while ours is "first projection then
# addition", the two approaches are mathematically equivalent:
......@@ -111,9 +138,10 @@ class GATConv(nn.Module):
# save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
# addition could be optimized with DGL's built-in function u_add_v,
# which further speeds up computation and saves memory footprint.
el = (feat * self.attn_l).sum(dim=-1).unsqueeze(-1)
er = (feat * self.attn_r).sum(dim=-1).unsqueeze(-1)
graph.ndata.update({'ft': feat, 'el': el, 'er': er})
el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
graph.srcdata.update({'ft': feat_src, 'el': el})
graph.dstdata.update({'er': er})
# compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
e = self.leaky_relu(graph.edata.pop('e'))
......@@ -122,10 +150,10 @@ class GATConv(nn.Module):
# message passing
graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
fn.sum('m', 'ft'))
rst = graph.ndata['ft']
rst = graph.dstdata['ft']
# residual
if self.res_fc is not None:
resval = self.res_fc(h).view(h.shape[0], -1, self._out_feats)
resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats)
rst = rst + resval
# activation
if self.activation:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment