"...text-generation-inference.git" did not exist on "bb9e670ac51d520a2f0baca2348222aea338242a"
Unverified Commit 2cdc4d3c authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Doc] Patch tutorial (#1380)

* patched 1_first

* done 2_basics

* done 4_batch

* done 1_gcn, 9_gat, 2_capsule

* 4_rgcn.py

* revert

* more fix
parent 0f40c6e4
...@@ -45,32 +45,26 @@ At the end of this tutorial, we hope you get a brief feeling of how DGL works. ...@@ -45,32 +45,26 @@ At the end of this tutorial, we hope you get a brief feeling of how DGL works.
# Create the graph for Zachary's karate club as follows: # Create the graph for Zachary's karate club as follows:
import dgl import dgl
import numpy as np
def build_karate_club_graph(): def build_karate_club_graph():
g = dgl.DGLGraph() # All 78 edges are stored in two numpy arrays. One for source endpoints
# add 34 nodes into the graph; nodes are labeled from 0~33 # while the other for destination endpoints.
g.add_nodes(34) src = np.array([1, 2, 2, 3, 3, 3, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 9, 10, 10,
# all 78 edges as a list of tuples 10, 11, 12, 12, 13, 13, 13, 13, 16, 16, 17, 17, 19, 19, 21, 21,
edge_list = [(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2), 25, 25, 27, 27, 27, 28, 29, 29, 30, 30, 31, 31, 31, 31, 32, 32,
(4, 0), (5, 0), (6, 0), (6, 4), (6, 5), (7, 0), (7, 1), 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, 33, 33, 33, 33,
(7, 2), (7, 3), (8, 0), (8, 2), (9, 2), (10, 0), (10, 4), 33, 33, 33, 33, 33, 33, 33, 33, 33, 33])
(10, 5), (11, 0), (12, 0), (12, 3), (13, 0), (13, 1), (13, 2), dst = np.array([0, 0, 1, 0, 1, 2, 0, 0, 0, 4, 5, 0, 1, 2, 3, 0, 2, 2, 0, 4,
(13, 3), (16, 5), (16, 6), (17, 0), (17, 1), (19, 0), (19, 1), 5, 0, 0, 3, 0, 1, 2, 3, 5, 6, 0, 1, 0, 1, 0, 1, 23, 24, 2, 23,
(21, 0), (21, 1), (25, 23), (25, 24), (27, 2), (27, 23), 24, 2, 23, 26, 1, 8, 0, 24, 25, 28, 2, 8, 14, 15, 18, 20, 22, 23,
(27, 24), (28, 2), (29, 23), (29, 26), (30, 1), (30, 8), 29, 30, 31, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30,
(31, 0), (31, 24), (31, 25), (31, 28), (32, 2), (32, 8), 31, 32])
(32, 14), (32, 15), (32, 18), (32, 20), (32, 22), (32, 23), # Edges are directional in DGL; Make them bi-directional.
(32, 29), (32, 30), (32, 31), (33, 8), (33, 9), (33, 13), u = np.concatenate([src, dst])
(33, 14), (33, 15), (33, 18), (33, 19), (33, 20), (33, 22), v = np.concatenate([dst, src])
(33, 23), (33, 26), (33, 27), (33, 28), (33, 29), (33, 30), # Construct a DGLGraph
(33, 31), (33, 32)] return dgl.DGLGraph((u, v))
# add edges two lists of nodes: src and dst
src, dst = tuple(zip(*edge_list))
g.add_edges(src, dst)
# edges are directional in DGL; make them bi-directional
g.add_edges(dst, src)
return g
############################################################################### ###############################################################################
# Print out the number of nodes and edges in our newly constructed graph: # Print out the number of nodes and edges in our newly constructed graph:
...@@ -95,27 +89,28 @@ nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .7, .7]]) ...@@ -95,27 +89,28 @@ nx.draw(nx_G, pos, with_labels=True, node_color=[[.7, .7, .7]])
# Step 2: Assign features to nodes or edges # Step 2: Assign features to nodes or edges
# -------------------------------------------- # --------------------------------------------
# Graph neural networks associate features with nodes and edges for training. # Graph neural networks associate features with nodes and edges for training.
# For our classification example, we assign each node an input feature as a one-hot vector: # For our classification example, since there is no input feature, we assign each node
# node :math:`v_i`'s feature vector is :math:`[0,\ldots,1,\dots,0]`, # with a learnable embedding vector.
# where the :math:`i^{th}` position is one.
#
# In DGL, you can add features for all nodes at once, using a feature tensor that # In DGL, you can add features for all nodes at once, using a feature tensor that
# batches node features along the first dimension. The code below adds the one-hot # batches node features along the first dimension. The code below adds the learnable
# feature for all nodes: # embeddings for all nodes:
import torch import torch
import torch.nn as nn
import torch.nn.functional as F
G.ndata['feat'] = torch.eye(34) embed = nn.Embedding(34, 5) # 34 nodes with embedding dim equal to 5
G.ndata['feat'] = embed.weight
############################################################################### ###############################################################################
# Print out the node features to verify: # Print out the node features to verify:
# print out node 2's input feature # print out node 2's input feature
print(G.nodes[2].data['feat']) print(G.ndata['feat'][2])
# print out node 10 and 11's input features # print out node 10 and 11's input features
print(G.nodes[[10, 11]].data['feat']) print(G.ndata['feat'][[10, 11]])
############################################################################### ###############################################################################
# Step 3: Define a Graph Convolutional Network (GCN) # Step 3: Define a Graph Convolutional Network (GCN)
...@@ -139,74 +134,41 @@ print(G.nodes[[10, 11]].data['feat']) ...@@ -139,74 +134,41 @@ print(G.nodes[[10, 11]].data['feat'])
# :alt: mailbox # :alt: mailbox
# :align: center # :align: center
# #
# Now, we show that the GCN layer can be easily implemented in DGL. # In DGL, we provide implementations of popular Graph Neural Network layers under
# the `dgl.<backend>.nn` subpackage. The :class:`~dgl.nn.pytorch.GraphConv` module
import torch.nn as nn # implements one Graph Convolutional layer.
import torch.nn.functional as F
# Define the message and reduce function
# NOTE: We ignore the GCN's normalization constant c_ij for this tutorial.
def gcn_message(edges):
# The argument is a batch of edges.
# This computes a (batch of) message called 'msg' using the source node's feature 'h'.
return {'msg' : edges.src['h']}
def gcn_reduce(nodes): from dgl.nn.pytorch import GraphConv
# The argument is a batch of nodes.
# This computes the new 'h' features by summing received 'msg' in each node's mailbox.
return {'h' : torch.sum(nodes.mailbox['msg'], dim=1)}
# Define the GCNLayer module
class GCNLayer(nn.Module):
def __init__(self, in_feats, out_feats):
super(GCNLayer, self).__init__()
self.linear = nn.Linear(in_feats, out_feats)
def forward(self, g, inputs):
# g is the graph and the inputs is the input node features
# first set the node features
g.ndata['h'] = inputs
# trigger message passing on all edges
g.send(g.edges(), gcn_message)
# trigger aggregation at all nodes
g.recv(g.nodes(), gcn_reduce)
# get the result node features
h = g.ndata.pop('h')
# perform linear transformation
return self.linear(h)
############################################################################### ###############################################################################
# In general, the nodes send information computed via the *message functions*,
# and aggregate incoming information with the *reduce functions*.
#
# Define a deeper GCN model that contains two GCN layers: # Define a deeper GCN model that contains two GCN layers:
# Define a 2-layer GCN model
class GCN(nn.Module): class GCN(nn.Module):
def __init__(self, in_feats, hidden_size, num_classes): def __init__(self, in_feats, hidden_size, num_classes):
super(GCN, self).__init__() super(GCN, self).__init__()
self.gcn1 = GCNLayer(in_feats, hidden_size) self.conv1 = GraphConv(in_feats, hidden_size)
self.gcn2 = GCNLayer(hidden_size, num_classes) self.conv2 = GraphConv(hidden_size, num_classes)
def forward(self, g, inputs): def forward(self, g, inputs):
h = self.gcn1(g, inputs) h = self.conv1(g, inputs)
h = torch.relu(h) h = torch.relu(h)
h = self.gcn2(g, h) h = self.conv2(g, h)
return h return h
# The first layer transforms input features of size of 34 to a hidden size of 5.
# The first layer transforms input features of size of 5 to a hidden size of 5.
# The second layer transforms the hidden layer and produces output features of # The second layer transforms the hidden layer and produces output features of
# size 2, corresponding to the two groups of the karate club. # size 2, corresponding to the two groups of the karate club.
net = GCN(34, 5, 2) net = GCN(5, 5, 2)
############################################################################### ###############################################################################
# Step 4: Data preparation and initialization # Step 4: Data preparation and initialization
# ------------------------------------------- # -------------------------------------------
# #
# We use one-hot vectors to initialize the node features. Since this is a # We use learnable embeddings to initialize the node features. Since this is a
# semi-supervised setting, only the instructor (node 0) and the club president # semi-supervised setting, only the instructor (node 0) and the club president
# (node 33) are assigned labels. The implementation is available as follow. # (node 33) are assigned labels. The implementation is available as follow.
inputs = torch.eye(34) inputs = embed.weight
labeled_nodes = torch.tensor([0, 33]) # only the instructor and the president nodes are labeled labeled_nodes = torch.tensor([0, 33]) # only the instructor and the president nodes are labeled
labels = torch.tensor([0, 1]) # their labels are different labels = torch.tensor([0, 1]) # their labels are different
...@@ -216,10 +178,11 @@ labels = torch.tensor([0, 1]) # their labels are different ...@@ -216,10 +178,11 @@ labels = torch.tensor([0, 1]) # their labels are different
# The training loop is exactly the same as other PyTorch models. # The training loop is exactly the same as other PyTorch models.
# We (1) create an optimizer, (2) feed the inputs to the model, # We (1) create an optimizer, (2) feed the inputs to the model,
# (3) calculate the loss and (4) use autograd to optimize the model. # (3) calculate the loss and (4) use autograd to optimize the model.
import itertools
optimizer = torch.optim.Adam(net.parameters(), lr=0.01) optimizer = torch.optim.Adam(itertools.chain(net.parameters(), embed.parameters()), lr=0.01)
all_logits = [] all_logits = []
for epoch in range(30): for epoch in range(50):
logits = net(G, inputs) logits = net(G, inputs)
# we save the logits for visualization later # we save the logits for visualization later
all_logits.append(logits.detach()) all_logits.append(logits.detach())
......
...@@ -33,18 +33,50 @@ plt.show() ...@@ -33,18 +33,50 @@ plt.show()
############################################################################### ###############################################################################
# The examples here show the same graph, except that :class:`DGLGraph` is always directional. # There are many ways to construct a :class:`DGLGraph`. Below are the allowed
# data types ordered by our recommendataion.
# #
# You can also create a graph by calling the DGL interface. # * A pair of arrays ``(u, v)`` storing the source and destination nodes respectively.
# They can be numpy arrays or tensor objects from the backend framework.
# * ``scipy`` sparse matrix representing the adjacency matrix of the graph to be
# constructed.
# * ``networkx`` graph object.
# * A list of edges in the form of integer pairs.
# #
# In the next example, you build a star graph. :class:`DGLGraph` nodes are a consecutive range of # The examples below construct the same star graph via different methods.
# integers between 0 and :func:`number_of_nodes() <DGLGraph.number_of_nodes>` #
# and can grow by calling :func:`add_nodes <DGLGraph.add_nodes>`. # :class:`DGLGraph` nodes are a consecutive range of integers between 0 and
# :func:`number_of_nodes() <DGLGraph.number_of_nodes>`.
# :class:`DGLGraph` edges are in order of their additions. Note that # :class:`DGLGraph` edges are in order of their additions. Note that
# edges are accessed in much the same way as nodes, with one extra feature: *edge broadcasting*. # edges are accessed in much the same way as nodes, with one extra feature:
# *edge broadcasting*.
import dgl
import torch as th import torch as th
import numpy as np
import scipy.sparse as spp
# Create a star graph from a pair of arrays (using ``numpy.array`` works too).
u = th.tensor([0, 0, 0, 0, 0])
v = th.tensor([1, 2, 3, 4, 5])
star1 = dgl.DGLGraph((u, v))
# Create the same graph in one go! Essentially, if one of the arrays is a scalar,
# the value is automatically broadcasted to match the length of the other array
# -- a feature called *edge broadcasting*.
start2 = dgl.DGLGraph((0, v))
# Create the same graph from a scipy sparse matrix (using ``scipy.sparse.csr_matrix`` works too).
adj = spp.coo_matrix((np.ones(len(u)), (u.numpy(), v.numpy())))
star3 = dgl.DGLGraph(adj)
# Create the same graph from a list of integer pairs.
elist = [(0, 1), (0, 2), (0, 3), (0, 4), (0, 5)]
star4 = dgl.DGLGraph(elist)
###############################################################################
# You can also create a graph by progressively adding more nodes and edges.
# Although it is not as efficient as the above constructors, it is suitable
# for applications where the graph cannot be constructed in one shot.
g = dgl.DGLGraph() g = dgl.DGLGraph()
g.add_nodes(10) g.add_nodes(10)
...@@ -63,12 +95,10 @@ g.clear(); g.add_nodes(10) ...@@ -63,12 +95,10 @@ g.clear(); g.add_nodes(10)
src = th.tensor(list(range(1, 10))); src = th.tensor(list(range(1, 10)));
g.add_edges(src, 0) g.add_edges(src, 0)
import networkx as nx # Visualize the graph.
import matplotlib.pyplot as plt
nx.draw(g.to_networkx(), with_labels=True) nx.draw(g.to_networkx(), with_labels=True)
plt.show() plt.show()
############################################################################### ###############################################################################
# Assigning a feature # Assigning a feature
# ------------------- # -------------------
...@@ -89,19 +119,14 @@ import torch as th ...@@ -89,19 +119,14 @@ import torch as th
x = th.randn(10, 3) x = th.randn(10, 3)
g.ndata['x'] = x g.ndata['x'] = x
############################################################################### ###############################################################################
# :func:`ndata <DGLGraph.ndata>` is a syntax sugar to access the state of all nodes. # :func:`ndata <DGLGraph.ndata>` is a syntax sugar to access the feature
# States are stored # data of all nodes. To get the features of some particular nodes, slice out
# in a container ``data`` that hosts a user-defined dictionary. # the corresponding rows.
print(g.ndata['x'] == g.nodes[:].data['x'])
# Access node set with integer, list, or integer tensor
g.nodes[0].data['x'] = th.zeros(1, 3)
g.nodes[[0, 1, 2]].data['x'] = th.zeros(3, 3)
g.nodes[th.tensor([0, 1, 2])].data['x'] = th.zeros(3, 3)
g.ndata['x'][0] = th.zeros(1, 3)
g.ndata['x'][[0, 1, 2]] = th.zeros(3, 3)
g.ndata['x'][th.tensor([0, 1, 2])] = th.randn((3, 3))
############################################################################### ###############################################################################
# Assigning edge features is similar to that of node features, # Assigning edge features is similar to that of node features,
...@@ -110,14 +135,15 @@ g.nodes[th.tensor([0, 1, 2])].data['x'] = th.zeros(3, 3) ...@@ -110,14 +135,15 @@ g.nodes[th.tensor([0, 1, 2])].data['x'] = th.zeros(3, 3)
g.edata['w'] = th.randn(9, 2) g.edata['w'] = th.randn(9, 2)
# Access edge set with IDs in integer, list, or integer tensor # Access edge set with IDs in integer, list, or integer tensor
g.edges[1].data['w'] = th.randn(1, 2) g.edata['w'][1] = th.randn(1, 2)
g.edges[[0, 1, 2]].data['w'] = th.zeros(3, 2) g.edata['w'][[0, 1, 2]] = th.zeros(3, 2)
g.edges[th.tensor([0, 1, 2])].data['w'] = th.zeros(3, 2) g.edata['w'][th.tensor([0, 1, 2])] = th.zeros(3, 2)
# You can also access the edges by giving endpoints
g.edges[1, 0].data['w'] = th.ones(1, 2) # edge 1 -> 0
g.edges[[1, 2, 3], [0, 0, 0]].data['w'] = th.ones(3, 2) # edges [1, 2, 3] -> 0
# You can get the edge ids by giving endpoints, which are useful for accessing the features.
g.edata['w'][g.edge_id(1, 0)] = th.ones(1, 2) # edge 1 -> 0
g.edata['w'][g.edge_ids([1, 2, 3], [0, 0, 0])] = th.ones(3, 2) # edges [1, 2, 3] -> 0
# Use edge broadcasting whenever applicable.
g.edata['w'][g.edge_ids([1, 2, 3], 0)] = th.ones(3, 2) # edges [1, 2, 3] -> 0
############################################################################### ###############################################################################
# After assignments, each node or edge field will be associated with a scheme # After assignments, each node or edge field will be associated with a scheme
...@@ -170,7 +196,6 @@ print(g_multi.edata['w']) ...@@ -170,7 +196,6 @@ print(g_multi.edata['w'])
# * Updating a feature of different schemes raises the risk of error on individual nodes (or # * Updating a feature of different schemes raises the risk of error on individual nodes (or
# node subset). # node subset).
############################################################################### ###############################################################################
# Next steps # Next steps
# ---------- # ----------
......
...@@ -72,6 +72,7 @@ plt.show() ...@@ -72,6 +72,7 @@ plt.show()
# list of graph and label pairs. # list of graph and label pairs.
import dgl import dgl
import torch
def collate(samples): def collate(samples):
# The input `samples` is a list of pairs # The input `samples` is a list of pairs
...@@ -99,57 +100,9 @@ def collate(samples): ...@@ -99,57 +100,9 @@ def collate(samples):
# be called readout or aggregation. Finally, the graph # be called readout or aggregation. Finally, the graph
# representations are fed into a classifier :math:`g` to predict the graph labels. # representations are fed into a classifier :math:`g` to predict the graph labels.
# #
# Graph convolution # Graph convolution layer can be found in the ``dgl.nn.<backend>`` submodule.
# -----------------
# The graph convolution operation is basically the same as that for graph convolutional network (GCN). To learn more,
# see the GCN `tutorial <https://docs.dgl.ai/tutorials/models/1_gnn/1_gcn.html>`_). The only difference is
# that we replace :math:`h_{v}^{(l+1)} = \text{ReLU}\left(b^{(l)}+\sum_{u\in\mathcal{N}(v)}h_{u}^{(l)}W^{(l)}\right)`
# by
# :math:`h_{v}^{(l+1)} = \text{ReLU}\left(b^{(l)}+\frac{1}{|\mathcal{N}(v)|}\sum_{u\in\mathcal{N}(v)}h_{u}^{(l)}W^{(l)}\right)`
#
# The replacement of summation by average is to balance nodes with different
# degrees. This gives a better performance for this experiment.
#
# The self edges added in the dataset initialization allows you to
# include the original node feature :math:`h_{v}^{(l)}` when taking the average.
import dgl.function as fn
import torch
import torch.nn as nn
# Sends a message of node feature h.
msg = fn.copy_src(src='h', out='m')
def reduce(nodes): from dgl.nn.pytorch import GraphConv
"""Take an average over all neighbor node features hu and use it to
overwrite the original node feature."""
accum = torch.mean(nodes.mailbox['m'], 1)
return {'h': accum}
class NodeApplyModule(nn.Module):
"""Update the node feature hv with ReLU(Whv+b)."""
def __init__(self, in_feats, out_feats, activation):
super(NodeApplyModule, self).__init__()
self.linear = nn.Linear(in_feats, out_feats)
self.activation = activation
def forward(self, node):
h = self.linear(node.data['h'])
h = self.activation(h)
return {'h' : h}
class GCN(nn.Module):
def __init__(self, in_feats, out_feats, activation):
super(GCN, self).__init__()
self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)
def forward(self, g, feature):
# Initialize the node features with h.
g.ndata['h'] = feature
g.update_all(msg, reduce)
g.apply_nodes(func=self.apply_mod)
return g.ndata.pop('h')
############################################################################### ###############################################################################
# Readout and classification # Readout and classification
...@@ -166,25 +119,25 @@ class GCN(nn.Module): ...@@ -166,25 +119,25 @@ class GCN(nn.Module):
# graphs with variable size. You then feed the graph representations into a # graphs with variable size. You then feed the graph representations into a
# classifier with one linear layer to obtain pre-softmax logits. # classifier with one linear layer to obtain pre-softmax logits.
import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
class Classifier(nn.Module): class Classifier(nn.Module):
def __init__(self, in_dim, hidden_dim, n_classes): def __init__(self, in_dim, hidden_dim, n_classes):
super(Classifier, self).__init__() super(Classifier, self).__init__()
self.conv1 = GraphConv(in_dim, hidden_dim)
self.layers = nn.ModuleList([ self.conv2 = GraphConv(hidden_dim, hidden_dim)
GCN(in_dim, hidden_dim, F.relu),
GCN(hidden_dim, hidden_dim, F.relu)])
self.classify = nn.Linear(hidden_dim, n_classes) self.classify = nn.Linear(hidden_dim, n_classes)
def forward(self, g): def forward(self, g):
# For undirected graphs, in_degree is the same as # Use node degree as the initial node feature. For undirected graphs, the in-degree
# out_degree. # is the same as the out_degree.
h = g.in_degrees().view(-1, 1).float() h = g.in_degrees().view(-1, 1).float()
for conv in self.layers: # Perform graph convolution and activation function.
h = conv(g, h) h = F.relu(self.conv1(g, h))
h = F.relu(self.conv2(g, h))
g.ndata['h'] = h g.ndata['h'] = h
# Calculate graph representation by averaging all the node representations.
hg = dgl.mean_nodes(g, 'h') hg = dgl.mean_nodes(g, 'h')
return self.classify(hg) return self.classify(hg)
......
...@@ -9,9 +9,14 @@ Yu Gai, Quan Gan, Zheng Zhang ...@@ -9,9 +9,14 @@ Yu Gai, Quan Gan, Zheng Zhang
This is a gentle introduction of using DGL to implement Graph Convolutional This is a gentle introduction of using DGL to implement Graph Convolutional
Networks (Kipf & Welling et al., `Semi-Supervised Classification with Graph Networks (Kipf & Welling et al., `Semi-Supervised Classification with Graph
Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We build upon Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We explain
the :doc:`earlier tutorial <../../basics/3_pagerank>` on DGLGraph and demonstrate what is under the hood of the :class:`~dgl.nn.pytorch.GraphConv` module.
how DGL combines graph with deep neural network and learn structural representations. The reader is expected to learn how to define a new GNN layer using DGL's
message passing APIs.
We build upon the :doc:`earlier tutorial <../../basics/3_pagerank>` on DGLGraph
and demonstrate how DGL combines graph with deep neural network and learn
structural representations.
""" """
############################################################################### ###############################################################################
...@@ -28,8 +33,8 @@ how DGL combines graph with deep neural network and learn structural representat ...@@ -28,8 +33,8 @@ how DGL combines graph with deep neural network and learn structural representat
# representation :math:`\hat{h}_{u}` with a linear projection followed by a # representation :math:`\hat{h}_{u}` with a linear projection followed by a
# non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`. # non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`.
# #
# We will implement step 1 with DGL message passing, and step 2 with the # We will implement step 1 with DGL message passing, and step 2 by
# ``apply_nodes`` method, whose node UDF will be a PyTorch ``nn.Module``. # PyTorch ``nn.Module``.
# #
# GCN implementation with DGL # GCN implementation with DGL
# `````````````````````````````````````````` # ``````````````````````````````````````````
...@@ -48,35 +53,23 @@ gcn_msg = fn.copy_src(src='h', out='m') ...@@ -48,35 +53,23 @@ gcn_msg = fn.copy_src(src='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h') gcn_reduce = fn.sum(msg='m', out='h')
############################################################################### ###############################################################################
# We then define the node UDF for ``apply_nodes``, which is a fully-connected layer: # We then proceed to define the GCNLayer module. A GCNLayer essentially performs
# message passing on all the nodes then applies a fully-connected layer.
class NodeApplyModule(nn.Module): class GCNLayer(nn.Module):
def __init__(self, in_feats, out_feats, activation): def __init__(self, in_feats, out_feats):
super(NodeApplyModule, self).__init__() super(GCNLayer, self).__init__()
self.linear = nn.Linear(in_feats, out_feats) self.linear = nn.Linear(in_feats, out_feats)
self.activation = activation
def forward(self, node):
h = self.linear(node.data['h'])
if self.activation is not None:
h = self.activation(h)
return {'h' : h}
###############################################################################
# We then proceed to define the GCN module. A GCN layer essentially performs
# message passing on all the nodes then applies the `NodeApplyModule`. Note
# that we omitted the dropout in the paper for simplicity.
class GCN(nn.Module):
def __init__(self, in_feats, out_feats, activation):
super(GCN, self).__init__()
self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)
def forward(self, g, feature): def forward(self, g, feature):
# Creating a local scope so that all the stored ndata and edata
# (such as the `'h'` ndata below) are automatically popped out
# when the scope exits.
with g.local_scope():
g.ndata['h'] = feature g.ndata['h'] = feature
g.update_all(gcn_msg, gcn_reduce) g.update_all(gcn_msg, gcn_reduce)
g.apply_nodes(func=self.apply_mod) h = g.ndata['h']
return g.ndata.pop('h') return self.linear(h)
############################################################################### ###############################################################################
# The forward function is essentially the same as any other commonly seen NNs # The forward function is essentially the same as any other commonly seen NNs
...@@ -84,17 +77,17 @@ class GCN(nn.Module): ...@@ -84,17 +77,17 @@ class GCN(nn.Module):
# let's define a simple neural network consisting of two GCN layers. Suppose we # let's define a simple neural network consisting of two GCN layers. Suppose we
# are training the classifier for the cora dataset (the input feature size is # are training the classifier for the cora dataset (the input feature size is
# 1433 and the number of classes is 7). The last GCN layer computes node embeddings, # 1433 and the number of classes is 7). The last GCN layer computes node embeddings,
# so the last layer in general doesn't apply activation. # so the last layer in general does not apply activation.
class Net(nn.Module): class Net(nn.Module):
def __init__(self): def __init__(self):
super(Net, self).__init__() super(Net, self).__init__()
self.gcn1 = GCN(1433, 16, F.relu) self.layer1 = GCNLayer(1433, 16)
self.gcn2 = GCN(16, 7, None) self.layer2 = GCNLayer(16, 7)
def forward(self, g, features): def forward(self, g, features):
x = self.gcn1(g, features) x = F.relu(self.layer1(g, features))
x = self.gcn2(g, x) x = self.layer2(g, x)
return x return x
net = Net() net = Net()
print(net) print(net)
...@@ -110,11 +103,7 @@ def load_cora_data(): ...@@ -110,11 +103,7 @@ def load_cora_data():
labels = th.LongTensor(data.labels) labels = th.LongTensor(data.labels)
train_mask = th.BoolTensor(data.train_mask) train_mask = th.BoolTensor(data.train_mask)
test_mask = th.BoolTensor(data.test_mask) test_mask = th.BoolTensor(data.test_mask)
g = data.graph g = DGLGraph(data.graph)
# add self loop
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
g.add_edges(g.nodes(), g.nodes())
return g, features, labels, train_mask, test_mask return g, features, labels, train_mask, test_mask
############################################################################### ###############################################################################
...@@ -137,7 +126,7 @@ def evaluate(model, g, features, labels, mask): ...@@ -137,7 +126,7 @@ def evaluate(model, g, features, labels, mask):
import time import time
import numpy as np import numpy as np
g, features, labels, train_mask, test_mask = load_cora_data() g, features, labels, train_mask, test_mask = load_cora_data()
optimizer = th.optim.Adam(net.parameters(), lr=1e-3) optimizer = th.optim.Adam(net.parameters(), lr=1e-2)
dur = [] dur = []
for epoch in range(50): for epoch in range(50):
if epoch >=3: if epoch >=3:
......
...@@ -298,9 +298,7 @@ lr = 0.01 # learning rate ...@@ -298,9 +298,7 @@ lr = 0.01 # learning rate
l2norm = 0 # L2 norm coefficient l2norm = 0 # L2 norm coefficient
# create graph # create graph
g = DGLGraph() g = DGLGraph((data.edge_src, data.edge_dst))
g.add_nodes(num_nodes)
g.add_edges(data.edge_src, data.edge_dst)
g.edata.update({'rel_type': edge_type, 'norm': edge_norm}) g.edata.update({'rel_type': edge_type, 'norm': edge_norm})
# create model # create model
......
...@@ -94,6 +94,15 @@ structure-free normalization, in the style of attention. ...@@ -94,6 +94,15 @@ structure-free normalization, in the style of attention.
# GAT in DGL # GAT in DGL
# ---------- # ----------
# #
# DGL provides an off-the-shelf implementation of the GAT layer under the ``dgl.nn.<backend>``
# subpackage. Simply import the ``GATConv`` as the follows.
from dgl.nn.pytorch import GATConv
###############################################################
# Readers can skip the following step-by-step explanation of the implementation and
# jump to the `Put everything together`_ for training and visualization results.
#
# To begin, you can get an overall impression about how a ``GATLayer`` module is # To begin, you can get an overall impression about how a ``GATLayer`` module is
# implemented in DGL. In this section, the four equations above are broken down # implemented in DGL. In this section, the four equations above are broken down
# one at a time. # one at a time.
...@@ -277,11 +286,7 @@ def load_cora_data(): ...@@ -277,11 +286,7 @@ def load_cora_data():
features = torch.FloatTensor(data.features) features = torch.FloatTensor(data.features)
labels = torch.LongTensor(data.labels) labels = torch.LongTensor(data.labels)
mask = torch.BoolTensor(data.train_mask) mask = torch.BoolTensor(data.train_mask)
g = data.graph g = DGLGraph(data.graph)
# add self loop
g.remove_edges_from(nx.selfloop_edges(g))
g = DGLGraph(g)
g.add_edges(g.nodes(), g.nodes())
return g, features, labels, mask return g, features, labels, mask
############################################################################## ##############################################################################
......
...@@ -68,18 +68,11 @@ import dgl ...@@ -68,18 +68,11 @@ import dgl
def init_graph(in_nodes, out_nodes, f_size): def init_graph(in_nodes, out_nodes, f_size):
g = dgl.DGLGraph() u = np.repeat(np.arange(in_nodes), out_nodes)
all_nodes = in_nodes + out_nodes v = np.tile(np.arange(in_nodes, in_nodes + out_nodes), in_nodes)
g.add_nodes(all_nodes) g = dgl.DGLGraph((u, v))
in_indx = list(range(in_nodes))
out_indx = list(range(in_nodes, in_nodes + out_nodes))
# add edges use edge broadcasting
for u in in_indx:
g.add_edges(u, out_indx)
# init states # init states
g.ndata['v'] = th.zeros(all_nodes, f_size) g.ndata['v'] = th.zeros(in_nodes + out_nodes, f_size)
g.edata['b'] = th.zeros(in_nodes * out_nodes, 1) g.edata['b'] = th.zeros(in_nodes * out_nodes, 1)
return g return g
...@@ -113,6 +106,8 @@ def init_graph(in_nodes, out_nodes, f_size): ...@@ -113,6 +106,8 @@ def init_graph(in_nodes, out_nodes, f_size):
# - The scalar product :math:`\hat{u}_{j|i}\cdot v_j` can be considered as how well capsule :math:`i` agrees with :math:`j`. It is used to update # - The scalar product :math:`\hat{u}_{j|i}\cdot v_j` can be considered as how well capsule :math:`i` agrees with :math:`j`. It is used to update
# :math:`b_{ij}=b_{ij}+\hat{u}_{j|i}\cdot v_j` # :math:`b_{ij}=b_{ij}+\hat{u}_{j|i}\cdot v_j`
import dgl.function as fn
class DGLRoutingLayer(nn.Module): class DGLRoutingLayer(nn.Module):
def __init__(self, in_nodes, out_nodes, f_size): def __init__(self, in_nodes, out_nodes, f_size):
super(DGLRoutingLayer, self).__init__() super(DGLRoutingLayer, self).__init__()
...@@ -125,24 +120,14 @@ class DGLRoutingLayer(nn.Module): ...@@ -125,24 +120,14 @@ class DGLRoutingLayer(nn.Module):
def forward(self, u_hat, routing_num=1): def forward(self, u_hat, routing_num=1):
self.g.edata['u_hat'] = u_hat self.g.edata['u_hat'] = u_hat
# step 2 (line 5)
def cap_message(edges):
return {'m': edges.data['c'] * edges.data['u_hat']}
self.g.register_message_func(cap_message)
def cap_reduce(nodes):
return {'s': th.sum(nodes.mailbox['m'], dim=1)}
self.g.register_reduce_func(cap_reduce)
for r in range(routing_num): for r in range(routing_num):
# step 1 (line 4): normalize over out edges # step 1 (line 4): normalize over out edges
edges_b = self.g.edata['b'].view(self.in_nodes, self.out_nodes) edges_b = self.g.edata['b'].view(self.in_nodes, self.out_nodes)
self.g.edata['c'] = F.softmax(edges_b, dim=1).view(-1, 1) self.g.edata['c'] = F.softmax(edges_b, dim=1).view(-1, 1)
self.g.edata['c u_hat'] = self.g.edata['c'] * self.g.edata['u_hat']
# Execute step 1 & 2 # Execute step 1 & 2
self.g.update_all() self.g.update_all(fn.copy_e('c u_hat', 'm'), fn.sum('m', 's'))
# step 3 (line 6) # step 3 (line 6)
self.g.nodes[self.out_indx].data['v'] = self.squash(self.g.nodes[self.out_indx].data['s'], dim=1) self.g.nodes[self.out_indx].data['v'] = self.squash(self.g.nodes[self.out_indx].data['s'], dim=1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment