"examples/mxnet/vscode:/vscode.git/clone" did not exist on "b1840f49fae30d6830370d36c3c6af4881c60927"
Unverified Commit f25bc176 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Hetero] Improve speed of several Hetero APIs (#1486)

* add clone function to frame

* add utest

* replace all local_var with local_scope

* fix utest

* avoid creating canonical types in __getitem__

* lint

* try another utest  appraoch for mx

* utest
parent 3c4506e9
......@@ -128,13 +128,13 @@ class Sequential(gluon.nn.Sequential):
>>> def __init__(self, **kwargs):
>>> super().__init__(**kwargs)
>>> def forward(self, graph, n_feat, e_feat):
>>> graph = graph.local_var()
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> graph.apply_edges(fn.u_add_v('h', 'h', 'e'))
>>> e_feat += graph.edata['e']
>>> return n_feat, e_feat
>>> with graph.local_scope():
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> graph.apply_edges(fn.u_add_v('h', 'h', 'e'))
>>> e_feat += graph.edata['e']
>>> return n_feat, e_feat
>>>
>>> g = dgl.DGLGraph()
>>> g.add_nodes(3)
......@@ -175,11 +175,11 @@ class Sequential(gluon.nn.Sequential):
>>> def __init__(self, **kwargs):
>>> super().__init__(**kwargs)
>>> def forward(self, graph, n_feat):
>>> graph = graph.local_var()
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> return n_feat.reshape(graph.number_of_nodes() // 2, 2, -1).sum(1)
>>> with graph.local_scope():
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> return n_feat.reshape(graph.number_of_nodes() // 2, 2, -1).sum(1)
>>>
>>> g1 = dgl.DGLGraph(nx.erdos_renyi_graph(32, 0.05))
>>> g2 = dgl.DGLGraph(nx.erdos_renyi_graph(16, 0.2))
......
......@@ -58,17 +58,16 @@ class AGNNConv(nn.Module):
The output feature of shape :math:`(N, *)` where :math:`*`
should be the same as input shape.
"""
graph = graph.local_var()
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.srcdata['norm_h'] = F.normalize(feat_src, p=2, dim=-1)
if isinstance(feat, tuple):
graph.dstdata['norm_h'] = F.normalize(feat_dst, p=2, dim=-1)
# compute cosine distance
graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
cos = graph.edata.pop('cos')
e = self.beta * cos
graph.edata['p'] = edge_softmax(graph, e)
graph.update_all(fn.u_mul_e('h', 'p', 'm'), fn.sum('m', 'h'))
return graph.dstdata.pop('h')
with graph.local_scope():
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.srcdata['norm_h'] = F.normalize(feat_src, p=2, dim=-1)
if isinstance(feat, tuple):
graph.dstdata['norm_h'] = F.normalize(feat_dst, p=2, dim=-1)
# compute cosine distance
graph.apply_edges(fn.u_dot_v('norm_h', 'norm_h', 'cos'))
cos = graph.edata.pop('cos')
e = self.beta * cos
graph.edata['p'] = edge_softmax(graph, e)
graph.update_all(fn.u_mul_e('h', 'p', 'm'), fn.sum('m', 'h'))
return graph.dstdata.pop('h')
......@@ -53,21 +53,21 @@ class APPNPConv(nn.Module):
The output feature of shape :math:`(N, *)` where :math:`*`
should be the same as input shape.
"""
graph = graph.local_var()
norm = th.pow(graph.in_degrees().float().clamp(min=1), -0.5)
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp).to(feat.device)
feat_0 = feat
for _ in range(self._k):
# normalization by src node
feat = feat * norm
graph.ndata['h'] = feat
graph.edata['w'] = self.edge_drop(
th.ones(graph.number_of_edges(), 1).to(feat.device))
graph.update_all(fn.u_mul_e('h', 'w', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
# normalization by dst node
feat = feat * norm
feat = (1 - self._alpha) * feat + self._alpha * feat_0
return feat
with graph.local_scope():
norm = th.pow(graph.in_degrees().float().clamp(min=1), -0.5)
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp).to(feat.device)
feat_0 = feat
for _ in range(self._k):
# normalization by src node
feat = feat * norm
graph.ndata['h'] = feat
graph.edata['w'] = self.edge_drop(
th.ones(graph.number_of_edges(), 1).to(feat.device))
graph.update_all(fn.u_mul_e('h', 'w', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
# normalization by dst node
feat = feat * norm
feat = (1 - self._alpha) * feat + self._alpha * feat_0
return feat
......@@ -217,12 +217,12 @@ class AtomicConv(nn.Module):
Updated node representations. V for the number of nodes, K for the
number of radial filters, and T for the number of types of atomic numbers.
"""
radial_pooled_values = self.radial_pooling(distances) # (K, E, 1)
graph = graph.local_var()
if self.features_to_use is not None:
feat = (feat == self.features_to_use).float() # (V, T)
graph.ndata['hv'] = feat
graph.edata['he'] = radial_pooled_values.transpose(1, 0).squeeze(-1) # (E, K)
graph.update_all(msg_func, reduce_func)
return graph.ndata['hv_new'].view(graph.number_of_nodes(), -1) # (V, K * T)
with graph.local_scope():
radial_pooled_values = self.radial_pooling(distances) # (K, E, 1)
if self.features_to_use is not None:
feat = (feat == self.features_to_use).float() # (V, T)
graph.ndata['hv'] = feat
graph.edata['he'] = radial_pooled_values.transpose(1, 0).squeeze(-1) # (E, K)
graph.update_all(msg_func, reduce_func)
return graph.ndata['hv_new'].view(graph.number_of_nodes(), -1) # (V, K * T)
......@@ -90,8 +90,8 @@ class CFConv(nn.Module):
float32 tensor of shape (V, out_feats)
Updated node representations.
"""
g = g.local_var()
g.ndata['hv'] = self.project_node(node_feats)
g.edata['he'] = self.project_edge(edge_feats)
g.update_all(fn.u_mul_e('hv', 'he', 'm'), fn.sum('m', 'h'))
return self.project_out(g.ndata['h'])
with g.local_scope():
g.ndata['hv'] = self.project_node(node_feats)
g.edata['he'] = self.project_edge(edge_feats)
g.update_all(fn.u_mul_e('hv', 'he', 'm'), fn.sum('m', 'h'))
return self.project_out(g.ndata['h'])
......@@ -118,44 +118,44 @@ class GATConv(nn.Module):
The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
is the number of heads, and :math:`D_{out}` is size of output feature.
"""
graph = graph.local_var()
if isinstance(feat, tuple):
h_src = self.feat_drop(feat[0])
h_dst = self.feat_drop(feat[1])
feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats)
feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats)
else:
h_src = h_dst = self.feat_drop(feat)
feat_src = feat_dst = self.fc(h_src).view(
-1, self._num_heads, self._out_feats)
# NOTE: GAT paper uses "first concatenation then linear projection"
# to compute attention scores, while ours is "first projection then
# addition", the two approaches are mathematically equivalent:
# We decompose the weight vector a mentioned in the paper into
# [a_l || a_r], then
# a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
# Our implementation is much efficient because we do not need to
# save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
# addition could be optimized with DGL's built-in function u_add_v,
# which further speeds up computation and saves memory footprint.
el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
graph.srcdata.update({'ft': feat_src, 'el': el})
graph.dstdata.update({'er': er})
# compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
e = self.leaky_relu(graph.edata.pop('e'))
# compute softmax
graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
# message passing
graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
fn.sum('m', 'ft'))
rst = graph.dstdata['ft']
# residual
if self.res_fc is not None:
resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats)
rst = rst + resval
# activation
if self.activation:
rst = self.activation(rst)
return rst
with graph.local_scope():
if isinstance(feat, tuple):
h_src = self.feat_drop(feat[0])
h_dst = self.feat_drop(feat[1])
feat_src = self.fc_src(h_src).view(-1, self._num_heads, self._out_feats)
feat_dst = self.fc_dst(h_dst).view(-1, self._num_heads, self._out_feats)
else:
h_src = h_dst = self.feat_drop(feat)
feat_src = feat_dst = self.fc(h_src).view(
-1, self._num_heads, self._out_feats)
# NOTE: GAT paper uses "first concatenation then linear projection"
# to compute attention scores, while ours is "first projection then
# addition", the two approaches are mathematically equivalent:
# We decompose the weight vector a mentioned in the paper into
# [a_l || a_r], then
# a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
# Our implementation is much efficient because we do not need to
# save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
# addition could be optimized with DGL's built-in function u_add_v,
# which further speeds up computation and saves memory footprint.
el = (feat_src * self.attn_l).sum(dim=-1).unsqueeze(-1)
er = (feat_dst * self.attn_r).sum(dim=-1).unsqueeze(-1)
graph.srcdata.update({'ft': feat_src, 'el': el})
graph.dstdata.update({'er': er})
# compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
e = self.leaky_relu(graph.edata.pop('e'))
# compute softmax
graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
# message passing
graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
fn.sum('m', 'ft'))
rst = graph.dstdata['ft']
# residual
if self.res_fc is not None:
resval = self.res_fc(h_dst).view(h_dst.shape[0], -1, self._out_feats)
rst = rst + resval
# activation
if self.activation:
rst = self.activation(rst)
return rst
......@@ -77,22 +77,22 @@ class GatedGraphConv(nn.Module):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is the output feature size.
"""
assert graph.is_homograph(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
graph = graph.local_var()
zero_pad = feat.new_zeros((feat.shape[0], self._out_feats - feat.shape[1]))
feat = th.cat([feat, zero_pad], -1)
with graph.local_scope():
assert graph.is_homograph(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
zero_pad = feat.new_zeros((feat.shape[0], self._out_feats - feat.shape[1]))
feat = th.cat([feat, zero_pad], -1)
for _ in range(self._n_steps):
graph.ndata['h'] = feat
for i in range(self._n_etypes):
eids = (etypes == i).nonzero().view(-1)
if len(eids) > 0:
graph.apply_edges(
lambda edges: {'W_e*h': self.linears[i](edges.src['h'])},
eids
)
graph.update_all(fn.copy_e('W_e*h', 'm'), fn.sum('m', 'a'))
a = graph.ndata.pop('a') # (N, D)
feat = self.gru(a, feat)
return feat
for _ in range(self._n_steps):
graph.ndata['h'] = feat
for i in range(self._n_etypes):
eids = (etypes == i).nonzero().view(-1)
if len(eids) > 0:
graph.apply_edges(
lambda edges: {'W_e*h': self.linears[i](edges.src['h'])},
eids
)
graph.update_all(fn.copy_e('W_e*h', 'm'), fn.sum('m', 'a'))
a = graph.ndata.pop('a') # (N, D)
feat = self.gru(a, feat)
return feat
......@@ -72,11 +72,11 @@ class GINConv(nn.Module):
If ``apply_func`` is None, :math:`D_{out}` should be the same
as input dimensionality.
"""
graph = graph.local_var()
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_u('h', 'm'), self._reducer('m', 'neigh'))
rst = (1 + self.eps) * feat_dst + graph.dstdata['neigh']
if self.apply_func is not None:
rst = self.apply_func(rst)
return rst
with graph.local_scope():
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_u('h', 'm'), self._reducer('m', 'neigh'))
rst = (1 + self.eps) * feat_dst + graph.dstdata['neigh']
if self.apply_func is not None:
rst = self.apply_func(rst)
return rst
......@@ -125,57 +125,56 @@ class GraphConv(nn.Module):
torch.Tensor
The output feature
"""
graph = graph.local_var()
if self._norm == 'both':
degs = graph.out_degrees().to(feat.device).float().clamp(min=1)
norm = th.pow(degs, -0.5)
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp)
feat = feat * norm
if weight is not None:
if self.weight is not None:
raise DGLError('External weight is provided while at the same time the'
' module has defined its own weight parameter. Please'
' create the module with flag weight=False.')
else:
weight = self.weight
if self._in_feats > self._out_feats:
# mult W first to reduce the feature size for aggregation.
if weight is not None:
feat = th.matmul(feat, weight)
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
else:
# aggregate first then mult W
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
if weight is not None:
rst = th.matmul(rst, weight)
if self._norm != 'none':
degs = graph.in_degrees().to(feat.device).float().clamp(min=1)
with graph.local_scope():
if self._norm == 'both':
degs = graph.out_degrees().to(feat.device).float().clamp(min=1)
norm = th.pow(degs, -0.5)
else:
norm = 1.0 / degs
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp)
rst = rst * norm
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp)
feat = feat * norm
if self.bias is not None:
rst = rst + self.bias
if self._activation is not None:
rst = self._activation(rst)
return rst
if weight is not None:
if self.weight is not None:
raise DGLError('External weight is provided while at the same time the'
' module has defined its own weight parameter. Please'
' create the module with flag weight=False.')
else:
weight = self.weight
if self._in_feats > self._out_feats:
# mult W first to reduce the feature size for aggregation.
if weight is not None:
feat = th.matmul(feat, weight)
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
else:
# aggregate first then mult W
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
if weight is not None:
rst = th.matmul(rst, weight)
if self._norm != 'none':
degs = graph.in_degrees().to(feat.device).float().clamp(min=1)
if self._norm == 'both':
norm = th.pow(degs, -0.5)
else:
norm = 1.0 / degs
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp)
rst = rst * norm
if self.bias is not None:
rst = rst + self.bias
if self._activation is not None:
rst = self._activation(rst)
return rst
def extra_repr(self):
"""Set the extra representation of the module,
......
......@@ -114,48 +114,47 @@ class SAGEConv(nn.Module):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is size of output feature.
"""
graph = graph.local_var()
if isinstance(feat, tuple):
feat_src = self.feat_drop(feat[0])
feat_dst = self.feat_drop(feat[1])
else:
feat_src = feat_dst = self.feat_drop(feat)
h_self = feat_dst
if self._aggre_type == 'mean':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), fn.mean('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'gcn':
check_eq_shape(feat)
graph.srcdata['h'] = feat_src
graph.dstdata['h'] = feat_dst # same as above if homogeneous
graph.update_all(fn.copy_src('h', 'm'), fn.sum('m', 'neigh'))
# divide in_degrees
degs = graph.in_degrees().to(feat_dst)
h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']) / (degs.unsqueeze(-1) + 1)
elif self._aggre_type == 'pool':
graph.srcdata['h'] = F.relu(self.fc_pool(feat_src))
graph.update_all(fn.copy_src('h', 'm'), fn.max('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'lstm':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), self._lstm_reducer)
h_neigh = graph.dstdata['neigh']
else:
raise KeyError('Aggregator type {} not recognized.'.format(self._aggre_type))
# GraphSAGE GCN does not require fc_self.
if self._aggre_type == 'gcn':
rst = self.fc_neigh(h_neigh)
else:
rst = self.fc_self(h_self) + self.fc_neigh(h_neigh)
# activation
if self.activation is not None:
rst = self.activation(rst)
# normalization
if self.norm is not None:
rst = self.norm(rst)
return rst
with graph.local_scope():
if isinstance(feat, tuple):
feat_src = self.feat_drop(feat[0])
feat_dst = self.feat_drop(feat[1])
else:
feat_src = feat_dst = self.feat_drop(feat)
h_self = feat_dst
if self._aggre_type == 'mean':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), fn.mean('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'gcn':
check_eq_shape(feat)
graph.srcdata['h'] = feat_src
graph.dstdata['h'] = feat_dst # same as above if homogeneous
graph.update_all(fn.copy_src('h', 'm'), fn.sum('m', 'neigh'))
# divide in_degrees
degs = graph.in_degrees().to(feat_dst)
h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']) / (degs.unsqueeze(-1) + 1)
elif self._aggre_type == 'pool':
graph.srcdata['h'] = F.relu(self.fc_pool(feat_src))
graph.update_all(fn.copy_src('h', 'm'), fn.max('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'lstm':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), self._lstm_reducer)
h_neigh = graph.dstdata['neigh']
else:
raise KeyError('Aggregator type {} not recognized.'.format(self._aggre_type))
# GraphSAGE GCN does not require fc_self.
if self._aggre_type == 'gcn':
rst = self.fc_neigh(h_neigh)
else:
rst = self.fc_self(h_self) + self.fc_neigh(h_neigh)
# activation
if self.activation is not None:
rst = self.activation(rst)
# normalization
if self.norm is not None:
rst = self.norm(rst)
return rst
......@@ -77,27 +77,27 @@ class SGConv(nn.Module):
If ``cache`` is se to True, ``feat`` and ``graph`` should not change during
training, or you will get wrong results.
"""
graph = graph.local_var()
if self._cached_h is not None:
feat = self._cached_h
else:
# compute normalization
degs = graph.in_degrees().float().clamp(min=1)
norm = th.pow(degs, -0.5)
norm = norm.to(feat.device).unsqueeze(1)
# compute (D^-1 A^k D)^k X
for _ in range(self._k):
feat = feat * norm
graph.ndata['h'] = feat
graph.update_all(fn.copy_u('h', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
feat = feat * norm
with graph.local_scope():
if self._cached_h is not None:
feat = self._cached_h
else:
# compute normalization
degs = graph.in_degrees().float().clamp(min=1)
norm = th.pow(degs, -0.5)
norm = norm.to(feat.device).unsqueeze(1)
# compute (D^-1 A^k D)^k X
for _ in range(self._k):
feat = feat * norm
graph.ndata['h'] = feat
graph.update_all(fn.copy_u('h', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
feat = feat * norm
if self.norm is not None:
feat = self.norm(feat)
if self.norm is not None:
feat = self.norm(feat)
# cache feature
if self._cached:
self._cached_h = feat
return self.fc(feat)
# cache feature
if self._cached:
self._cached_h = feat
return self.fc(feat)
......@@ -73,29 +73,29 @@ class TAGConv(nn.Module):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is size of output feature.
"""
assert graph.is_homograph(), 'Graph is not homogeneous'
graph = graph.local_var()
with graph.local_scope():
assert graph.is_homograph(), 'Graph is not homogeneous'
norm = th.pow(graph.in_degrees().float().clamp(min=1), -0.5)
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp).to(feat.device)
norm = th.pow(graph.in_degrees().float().clamp(min=1), -0.5)
shp = norm.shape + (1,) * (feat.dim() - 1)
norm = th.reshape(norm, shp).to(feat.device)
#D-1/2 A D -1/2 X
fstack = [feat]
for _ in range(self._k):
#D-1/2 A D -1/2 X
fstack = [feat]
for _ in range(self._k):
rst = fstack[-1] * norm
graph.ndata['h'] = rst
rst = fstack[-1] * norm
graph.ndata['h'] = rst
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.ndata['h']
rst = rst * norm
fstack.append(rst)
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.ndata['h']
rst = rst * norm
fstack.append(rst)
rst = self.lin(th.cat(fstack, dim=-1))
rst = self.lin(th.cat(fstack, dim=-1))
if self._activation is not None:
rst = self._activation(rst)
if self._activation is not None:
rst = self._activation(rst)
return rst
return rst
......@@ -130,13 +130,13 @@ class Sequential(nn.Sequential):
>>> def __init__(self):
>>> super().__init__()
>>> def forward(self, graph, n_feat, e_feat):
>>> graph = graph.local_var()
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> graph.apply_edges(fn.u_add_v('h', 'h', 'e'))
>>> e_feat += graph.edata['e']
>>> return n_feat, e_feat
>>> with graph.local_scope():
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> graph.apply_edges(fn.u_add_v('h', 'h', 'e'))
>>> e_feat += graph.edata['e']
>>> return n_feat, e_feat
>>>
>>> g = dgl.DGLGraph()
>>> g.add_nodes(3)
......@@ -169,11 +169,11 @@ class Sequential(nn.Sequential):
>>> def __init__(self):
>>> super().__init__()
>>> def forward(self, graph, n_feat):
>>> graph = graph.local_var()
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> return n_feat.view(graph.number_of_nodes() // 2, 2, -1).sum(1)
>>> with graph.local_scope():
>>> graph.ndata['h'] = n_feat
>>> graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
>>> n_feat += graph.ndata['h']
>>> return n_feat.view(graph.number_of_nodes() // 2, 2, -1).sum(1)
>>>
>>> g1 = dgl.DGLGraph(nx.erdos_renyi_graph(32, 0.05))
>>> g2 = dgl.DGLGraph(nx.erdos_renyi_graph(16, 0.2))
......
......@@ -55,23 +55,23 @@ class APPNPConv(layers.Layer):
The output feature of shape :math:`(N, *)` where :math:`*`
should be the same as input shape.
"""
graph = graph.local_var()
degs = tf.clip_by_value(tf.cast(graph.in_degrees(), tf.float32),
clip_value_min=1, clip_value_max=np.inf)
norm = tf.pow(degs, -0.5)
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = tf.reshape(norm, shp)
feat_0 = feat
for _ in range(self._k):
# normalization by src node
feat = feat * norm
graph.ndata['h'] = feat
graph.edata['w'] = self.edge_drop(
tf.ones(graph.number_of_edges(), 1))
graph.update_all(fn.u_mul_e('h', 'w', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
# normalization by dst node
feat = feat * norm
feat = (1 - self._alpha) * feat + self._alpha * feat_0
return feat
with graph.local_scope():
degs = tf.clip_by_value(tf.cast(graph.in_degrees(), tf.float32),
clip_value_min=1, clip_value_max=np.inf)
norm = tf.pow(degs, -0.5)
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = tf.reshape(norm, shp)
feat_0 = feat
for _ in range(self._k):
# normalization by src node
feat = feat * norm
graph.ndata['h'] = feat
graph.edata['w'] = self.edge_drop(
tf.ones(graph.number_of_edges(), 1))
graph.update_all(fn.u_mul_e('h', 'w', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
# normalization by dst node
feat = feat * norm
feat = (1 - self._alpha) * feat + self._alpha * feat_0
return feat
......@@ -112,45 +112,45 @@ class GATConv(layers.Layer):
The output feature of shape :math:`(N, H, D_{out})` where :math:`H`
is the number of heads, and :math:`D_{out}` is size of output feature.
"""
graph = graph.local_var()
if isinstance(feat, tuple):
h_src = self.feat_drop(feat[0])
h_dst = self.feat_drop(feat[1])
feat_src = tf.reshape(self.fc_src(h_src), (-1, self._num_heads, self._out_feats))
feat_dst = tf.reshape(self.fc_dst(h_dst), (-1, self._num_heads, self._out_feats))
else:
h_src = h_dst = self.feat_drop(feat)
feat_src = feat_dst = tf.reshape(
self.fc(h_src), (-1, self._num_heads, self._out_feats))
# NOTE: GAT paper uses "first concatenation then linear projection"
# to compute attention scores, while ours is "first projection then
# addition", the two approaches are mathematically equivalent:
# We decompose the weight vector a mentioned in the paper into
# [a_l || a_r], then
# a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
# Our implementation is much efficient because we do not need to
# save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
# addition could be optimized with DGL's built-in function u_add_v,
# which further speeds up computation and saves memory footprint.
el = tf.reduce_sum(feat_src * self.attn_l, axis=-1, keepdims=True)
er = tf.reduce_sum(feat_dst * self.attn_r, axis=-1, keepdims=True)
graph.srcdata.update({'ft': feat_src, 'el': el})
graph.dstdata.update({'er': er})
# compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
e = self.leaky_relu(graph.edata.pop('e'))
# compute softmax
graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
# message passing
graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
fn.sum('m', 'ft'))
rst = graph.dstdata['ft']
# residual
if self.res_fc is not None:
resval = tf.reshape(self.res_fc(
h_dst), (h_dst.shape[0], -1, self._out_feats))
rst = rst + resval
# activation
if self.activation:
rst = self.activation(rst)
return rst
with graph.local_scope():
if isinstance(feat, tuple):
h_src = self.feat_drop(feat[0])
h_dst = self.feat_drop(feat[1])
feat_src = tf.reshape(self.fc_src(h_src), (-1, self._num_heads, self._out_feats))
feat_dst = tf.reshape(self.fc_dst(h_dst), (-1, self._num_heads, self._out_feats))
else:
h_src = h_dst = self.feat_drop(feat)
feat_src = feat_dst = tf.reshape(
self.fc(h_src), (-1, self._num_heads, self._out_feats))
# NOTE: GAT paper uses "first concatenation then linear projection"
# to compute attention scores, while ours is "first projection then
# addition", the two approaches are mathematically equivalent:
# We decompose the weight vector a mentioned in the paper into
# [a_l || a_r], then
# a^T [Wh_i || Wh_j] = a_l Wh_i + a_r Wh_j
# Our implementation is much efficient because we do not need to
# save [Wh_i || Wh_j] on edges, which is not memory-efficient. Plus,
# addition could be optimized with DGL's built-in function u_add_v,
# which further speeds up computation and saves memory footprint.
el = tf.reduce_sum(feat_src * self.attn_l, axis=-1, keepdims=True)
er = tf.reduce_sum(feat_dst * self.attn_r, axis=-1, keepdims=True)
graph.srcdata.update({'ft': feat_src, 'el': el})
graph.dstdata.update({'er': er})
# compute edge attention, el and er are a_l Wh_i and a_r Wh_j respectively.
graph.apply_edges(fn.u_add_v('el', 'er', 'e'))
e = self.leaky_relu(graph.edata.pop('e'))
# compute softmax
graph.edata['a'] = self.attn_drop(edge_softmax(graph, e))
# message passing
graph.update_all(fn.u_mul_e('ft', 'a', 'm'),
fn.sum('m', 'ft'))
rst = graph.dstdata['ft']
# residual
if self.res_fc is not None:
resval = tf.reshape(self.res_fc(
h_dst), (h_dst.shape[0], -1, self._out_feats))
rst = rst + resval
# activation
if self.activation:
rst = self.activation(rst)
return rst
......@@ -70,11 +70,11 @@ class GINConv(layers.Layer):
If ``apply_func`` is None, :math:`D_{out}` should be the same
as input dimensionality.
"""
graph = graph.local_var()
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_u('h', 'm'), self._reducer('m', 'neigh'))
rst = (1 + self.eps) * feat_dst + graph.dstdata['neigh']
if self.apply_func is not None:
rst = self.apply_func(rst)
return rst
with graph.local_scope():
feat_src, feat_dst = expand_as_pair(feat)
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_u('h', 'm'), self._reducer('m', 'neigh'))
rst = (1 + self.eps) * feat_dst + graph.dstdata['neigh']
if self.apply_func is not None:
rst = self.apply_func(rst)
return rst
......@@ -122,61 +122,60 @@ class GraphConv(layers.Layer):
tf.Tensor
The output feature
"""
graph = graph.local_var()
if self._norm == 'both':
degs = tf.clip_by_value(tf.cast(graph.out_degrees(), tf.float32),
clip_value_min=1,
clip_value_max=np.inf)
norm = tf.pow(degs, -0.5)
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = tf.reshape(norm, shp)
feat = feat * norm
if weight is not None:
if self.weight is not None:
raise DGLError('External weight is provided while at the same time the'
' module has defined its own weight parameter. Please'
' create the module with flag weight=False.')
else:
weight = self.weight
if self._in_feats > self._out_feats:
# mult W first to reduce the feature size for aggregation.
if weight is not None:
feat = tf.matmul(feat, weight)
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
else:
# aggregate first then mult W
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
if weight is not None:
rst = tf.matmul(rst, weight)
if self._norm != 'none':
degs = tf.clip_by_value(tf.cast(graph.in_degrees(), tf.float32),
clip_value_min=1,
clip_value_max=np.inf)
with graph.local_scope():
if self._norm == 'both':
degs = tf.clip_by_value(tf.cast(graph.out_degrees(), tf.float32),
clip_value_min=1,
clip_value_max=np.inf)
norm = tf.pow(degs, -0.5)
else:
norm = 1.0 / degs
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = tf.reshape(norm, shp)
rst = rst * norm
if self.bias is not None:
rst = rst + self.bias
if self._activation is not None:
rst = self._activation(rst)
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = tf.reshape(norm, shp)
feat = feat * norm
return rst
if weight is not None:
if self.weight is not None:
raise DGLError('External weight is provided while at the same time the'
' module has defined its own weight parameter. Please'
' create the module with flag weight=False.')
else:
weight = self.weight
if self._in_feats > self._out_feats:
# mult W first to reduce the feature size for aggregation.
if weight is not None:
feat = tf.matmul(feat, weight)
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
else:
# aggregate first then mult W
graph.srcdata['h'] = feat
graph.update_all(fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'))
rst = graph.dstdata['h']
if weight is not None:
rst = tf.matmul(rst, weight)
if self._norm != 'none':
degs = tf.clip_by_value(tf.cast(graph.in_degrees(), tf.float32),
clip_value_min=1,
clip_value_max=np.inf)
if self._norm == 'both':
norm = tf.pow(degs, -0.5)
else:
norm = 1.0 / degs
shp = norm.shape + (1,) * (feat.ndim - 1)
norm = tf.reshape(norm, shp)
rst = rst * norm
if self.bias is not None:
rst = rst + self.bias
if self._activation is not None:
rst = self._activation(rst)
return rst
def extra_repr(self):
"""Set the extra representation of the module,
......
......@@ -100,49 +100,48 @@ class SAGEConv(layers.Layer):
The output feature of shape :math:`(N, D_{out})` where :math:`D_{out}`
is size of output feature.
"""
graph = graph.local_var()
if isinstance(feat, tuple):
feat_src = self.feat_drop(feat[0])
feat_dst = self.feat_drop(feat[1])
else:
feat_src = feat_dst = self.feat_drop(feat)
h_self = feat_dst
if self._aggre_type == 'mean':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), fn.mean('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'gcn':
check_eq_shape(feat)
graph.srcdata['h'] = feat_src
graph.dstdata['h'] = feat_dst # same as above if homogeneous
graph.update_all(fn.copy_src('h', 'm'), fn.sum('m', 'neigh'))
# divide in_degrees
degs = tf.cast(graph.in_degrees(), tf.float32)
h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']
) / (tf.expand_dims(degs, -1) + 1)
elif self._aggre_type == 'pool':
graph.srcdata['h'] = tf.nn.relu(self.fc_pool(feat_src))
graph.update_all(fn.copy_src('h', 'm'), fn.max('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'lstm':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), self._lstm_reducer)
h_neigh = graph.dstdata['neigh']
else:
raise KeyError(
'Aggregator type {} not recognized.'.format(self._aggre_type))
# GraphSAGE GCN does not require fc_self.
if self._aggre_type == 'gcn':
rst = self.fc_neigh(h_neigh)
else:
rst = self.fc_self(h_self) + self.fc_neigh(h_neigh)
# activation
if self.activation is not None:
rst = self.activation(rst)
# normalization
if self.norm is not None:
rst = self.norm(rst)
return rst
with graph.local_scope():
if isinstance(feat, tuple):
feat_src = self.feat_drop(feat[0])
feat_dst = self.feat_drop(feat[1])
else:
feat_src = feat_dst = self.feat_drop(feat)
h_self = feat_dst
if self._aggre_type == 'mean':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), fn.mean('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'gcn':
check_eq_shape(feat)
graph.srcdata['h'] = feat_src
graph.dstdata['h'] = feat_dst # same as above if homogeneous
graph.update_all(fn.copy_src('h', 'm'), fn.sum('m', 'neigh'))
# divide in_degrees
degs = tf.cast(graph.in_degrees(), tf.float32)
h_neigh = (graph.dstdata['neigh'] + graph.dstdata['h']
) / (tf.expand_dims(degs, -1) + 1)
elif self._aggre_type == 'pool':
graph.srcdata['h'] = tf.nn.relu(self.fc_pool(feat_src))
graph.update_all(fn.copy_src('h', 'm'), fn.max('m', 'neigh'))
h_neigh = graph.dstdata['neigh']
elif self._aggre_type == 'lstm':
graph.srcdata['h'] = feat_src
graph.update_all(fn.copy_src('h', 'm'), self._lstm_reducer)
h_neigh = graph.dstdata['neigh']
else:
raise KeyError(
'Aggregator type {} not recognized.'.format(self._aggre_type))
# GraphSAGE GCN does not require fc_self.
if self._aggre_type == 'gcn':
rst = self.fc_neigh(h_neigh)
else:
rst = self.fc_self(h_self) + self.fc_neigh(h_neigh)
# activation
if self.activation is not None:
rst = self.activation(rst)
# normalization
if self.norm is not None:
rst = self.norm(rst)
return rst
......@@ -72,28 +72,28 @@ class SGConv(layers.Layer):
If ``cache`` is se to True, ``feat`` and ``graph`` should not change during
training, or you will get wrong results.
"""
graph = graph.local_var()
if self._cached_h is not None:
feat = self._cached_h
else:
# compute normalization
degs = tf.clip_by_value(tf.cast(
graph.in_degrees(), tf.float32), clip_value_min=1, clip_value_max=np.inf)
norm = tf.pow(degs, -0.5)
norm = tf.expand_dims(norm, 1)
# compute (D^-1 A^k D)^k X
for _ in range(self._k):
feat = feat * norm
graph.ndata['h'] = feat
graph.update_all(fn.copy_u('h', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
feat = feat * norm
with graph.local_scope():
if self._cached_h is not None:
feat = self._cached_h
else:
# compute normalization
degs = tf.clip_by_value(tf.cast(
graph.in_degrees(), tf.float32), clip_value_min=1, clip_value_max=np.inf)
norm = tf.pow(degs, -0.5)
norm = tf.expand_dims(norm, 1)
# compute (D^-1 A^k D)^k X
for _ in range(self._k):
feat = feat * norm
graph.ndata['h'] = feat
graph.update_all(fn.copy_u('h', 'm'),
fn.sum('m', 'h'))
feat = graph.ndata.pop('h')
feat = feat * norm
if self.norm is not None:
feat = self.norm(feat)
if self.norm is not None:
feat = self.norm(feat)
# cache feature
if self._cached:
self._cached_h = feat
return self.fc(feat)
# cache feature
if self._cached:
self._cached_h = feat
return self.fc(feat)
......@@ -12,24 +12,24 @@ def edge_softmax_real(graph, score, eids=ALL):
"""Edge Softmax function"""
if not is_all(eids):
graph = graph.edge_subgraph(tf.cast(eids, tf.int64))
g = graph.local_var()
g.edata['s'] = score
g.update_all(fn.copy_e('s', 'm'), fn.max('m', 'smax'))
g.apply_edges(fn.e_sub_v('s', 'smax', 'out'))
g.edata['out'] = tf.math.exp(g.edata['out'])
g.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum'))
g.apply_edges(fn.e_div_v('out', 'out_sum', 'out'))
out = g.edata['out']
with graph.local_scope():
graph.edata['s'] = score
graph.update_all(fn.copy_e('s', 'm'), fn.max('m', 'smax'))
graph.apply_edges(fn.e_sub_v('s', 'smax', 'out'))
graph.edata['out'] = tf.math.exp(graph.edata['out'])
graph.update_all(fn.copy_e('out', 'm'), fn.sum('m', 'out_sum'))
graph.apply_edges(fn.e_div_v('out', 'out_sum', 'out'))
out = graph.edata['out']
def edge_softmax_backward(grad_out):
g = graph.local_var()
# clear backward cache explicitly
g.edata['out'] = out
g.edata['grad_s'] = out * grad_out
g.update_all(fn.copy_e('grad_s', 'm'), fn.sum('m', 'accum'))
g.apply_edges(fn.e_mul_v('out', 'accum', 'out'))
grad_score = g.edata['grad_s'] - g.edata['out']
return grad_score
with graph.local_scope():
# clear backward cache explicitly
graph.edata['out'] = out
graph.edata['grad_s'] = out * grad_out
graph.update_all(fn.copy_e('grad_s', 'm'), fn.sum('m', 'accum'))
graph.apply_edges(fn.e_mul_v('out', 'accum', 'out'))
grad_score = graph.edata['grad_s'] - graph.edata['out']
return grad_score
return out, edge_softmax_backward
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment