Unverified Commit 79ceccef authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

[Doc] Fix missing file (#119)

parent 87ed21ec
"""
.. _model-gcn:
Graph Convolutional Network
====================================
**Author:** Qi Huang, `Minjie Wang <https://jermainewang.github.io/>`_,
Yu Gai, Quan Gan, Zheng Zhang
This is a gentle introduction of using DGL to implement Graph Convolutional
Networks (Kipf & Welling et al., `Semi-Supervised Classificaton with Graph
Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We build upon
the :doc:`earlier tutorial <../3_pagerank>` on DGLGraph and demonstrate
how DGL combines graph with deep neural network and learn structural representations.
"""
###############################################################################
# Model Overview
# ------------------------------------------
# GCN from the perspective of message passing
# ```````````````````````````````````````````````
# We describe a layer of graph convolutional neural network from a message
# passing perspective; the math can be found `here <math_>`_.
# It boils down to the following step, for each node :math:`u`:
#
# 1) Aggregate neighbors' representations :math:`h_{v}` to produce an
# intermediate representation :math:`\hat{h}_u`. 2) Transform the aggregated
# representation :math:`\hat{h}_{u}` with a linear projection followed by a
# non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`.
#
# We will implement step 1 with DGL message passing, and step 2 with the
# ``apply_nodes`` method, whose node UDF will be a PyTorch ``nn.Module``.
#
# GCN implementation with DGL
# ``````````````````````````````````````````
# We first define the message and reduce function as usual. Since the
# aggregation on a node :math:`u` only involves summing over the neighbors'
# representations :math:`h_v`, we can simply use builtin functions:
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph
gcn_msg = fn.copy_src(src='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h')
###############################################################################
# We then define the node UDF for ``apply_nodes``, which is a fully-connected layer:
class NodeApplyModule(nn.Module):
def __init__(self, in_feats, out_feats, activation):
super(NodeApplyModule, self).__init__()
self.linear = nn.Linear(in_feats, out_feats)
self.activation = activation
def forward(self, node):
h = self.linear(node.data['h'])
h = self.activation(h)
return {'h' : h}
###############################################################################
# We then proceed to define the GCN module. A GCN layer essentially performs
# message passing on all the nodes then applies the `NodeApplyModule`. Note
# that we omitted the dropout in the paper for simplicity.
class GCN(nn.Module):
def __init__(self, in_feats, out_feats, activation):
super(GCN, self).__init__()
self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)
def forward(self, g, feature):
g.ndata['h'] = feature
g.update_all(gcn_msg, gcn_reduce)
g.apply_nodes(func=self.apply_mod)
return g.ndata.pop('h')
###############################################################################
# The forward function is essentially the same as any other commonly seen NNs
# model in PyTorch. We can initialize GCN like any ``nn.Module``. For example,
# let's define a simple neural network consisting of two GCN layers. Suppose we
# are training the classifier for the cora dataset (the input feature size is
# 1433 and the number of classes is 7).
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.gcn1 = GCN(1433, 16, F.relu)
self.gcn2 = GCN(16, 7, F.relu)
def forward(self, g, features):
x = self.gcn1(g, features)
x = self.gcn2(g, x)
return x
net = Net()
print(net)
###############################################################################
# We load the cora dataset using DGL's built-in data module.
from dgl.data import citation_graph as citegrh
def load_cora_data():
data = citegrh.load_cora()
features = th.FloatTensor(data.features)
labels = th.LongTensor(data.labels)
mask = th.ByteTensor(data.train_mask)
g = DGLGraph(data.graph)
return g, features, labels, mask
###############################################################################
# We then train the network as follows:
import time
import numpy as np
g, features, labels, mask = load_cora_data()
optimizer = th.optim.Adam(net.parameters(), lr=1e-3)
dur = []
for epoch in range(30):
if epoch >=3:
t0 = time.time()
logits = net(g, features)
logp = F.log_softmax(logits, 1)
loss = F.nll_loss(logp[mask], labels[mask])
optimizer.zero_grad()
loss.backward()
optimizer.step()
if epoch >=3:
dur.append(time.time() - t0)
print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format(
epoch, loss.item(), np.mean(dur)))
###############################################################################
# .. _math:
#
# GCN in one formula
# ------------------
# Mathematically, the GCN model follows this formula:
#
# :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})`
#
# Here, :math:`H^{(l)}` denotes the :math:`l^{th}` layer in the network,
# :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for
# this layer. :math:`D` and :math:`A`, as commonly seen, represent degree
# matrix and adjacency matrix, respectively. The ~ is a renormalization trick
# in which we add a self-connection to each node of the graph, and build the
# corresponding degree and adjacency matrix. The shape of the input
# :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes
# and :math:`D` is the number of input features. We can chain up multiple
# layers as such to produce a node-level representation output with shape
# :math`N \times F`, where :math:`F` is the dimension of the output node
# feature vector.
#
# The equation can be efficiently implemented using sparse matrix
# multiplication kernels (such as Kipf's
# `pygcn <https://github.com/tkipf/pygcn>`_ code). The above DGL implementation
# in fact has already used this trick due to the use of builtin functions. To
# understand what is under the hood, please read our tutorial on :doc:` PageRank <3_pagerank>`.
"""
.. _model-capsule:
Capsule Network
================
**Author**: `Jinjing Zhou`
This tutorial explains how to use DGL library and its language to implement the
`capsule network <http://arxiv.org/abs/1710.09829>`__ proposed by Geoffrey
Hinton and his team. The algorithm aims to provide a better alternative to
current neural network structures. By using DGL library, users can implement
the algorithm in a more intuitive way.
"""
##############################################################################
# Model Overview
# ---------------
# Introduction
# ```````````````````
# Capsule Network were first introduced in 2011 by Geoffrey Hinton, et al., in
# paper `Transforming Autoencoders
# <https://www.cs.toronto.edu/~fritz/absps/transauto6.pdf>`__, but it was only
# a few months ago, in November 2017, that Sara Sabour, Nicholas Frosst, and
# Geoffrey Hinton published a paper called Dynamic Routing between Capsules,
# where they introduced a CapsNet architecture that reached state-of-the-art
# performance on MNIST.
#
# What's a capsule?
# ```````````````````
# In papers, author states that "A capsule is a group of neurons whose activity
# vector represents the instantiation parameters of a specific type of entity
# such as an object or an object part."
#
# Generally speaking, the idea of capsule is to encode all the information
# about the features into a vector form, by substituting scalars in traditional
# neural network with vectors. And use the norm of the vector to represents
# the meaning of original scalars.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f1.png
#
# Dynamic Routing Algorithm
# `````````````````````````````
# Due to the different structure of network, capsules network has different
# operations to calculate results. This figure shows the comparison, drawn by
# `Max Pechyonkin
# <https://medium.com/ai%C2%B3-theory-practice-business/understanding-hintons-capsule-networks-part-ii-how-capsules-work-153b6ade9f66O>`__
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f2.png
# :height: 250px
#
# The key idea is that the output of each capsule is the sum of weighted input vectors.
# We will go into details in the later section with code implementations.
#
# Model Implementations
# -------------------------
##############################################################################
# Algorithm Overview
# ```````````````````````````
#
# .. image:: https://raw.githubusercontent.com/VoVAllen/DGL_Capsule/master/algorithm.png
#
# The main step of routing algorithm is line 4 - 7. In ``DGLGraph`` structure, we consider these steps as a message passing
# procedure.
##############################################################################
# Consider capsule routing as a graph structure
# ````````````````````````````````````````````````````````````````````````````
# We can consider each capsule as a node in a graph, and connect all the nodes between layers.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f3.png
# :height: 150px
#
def construct_graph(self):
g = dgl.DGLGraph()
g.add_nodes(self.input_capsule_num + self.output_capsule_num)
input_nodes = list(range(self.input_capsule_num))
output_nodes = list(range(self.input_capsule_num, self.input_capsule_num + self.output_capsule_num))
u, v = [], []
for i in input_nodes:
for j in output_nodes:
u.append(i)
v.append(j)
g.add_edges(u, v)
return g, input_nodes, output_nodes
##############################################################################
# Write Message Passing Functions
# ``````````````````````````````````
# Reduce Functions (line 4 - 5)
# .............................................
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f5.png
#
# At this stage, we need to define a reduce function to aggregate the node features
# from layer :math:`l` and weighted sum them into layer :math:`(l+1)`'s node features.
#
# .. note::
# The softmax operation is over dimension :math:`j` instead of :math:`i`.
def capsule_reduce(node, msg):
b_ij_c, u_hat = msg['b_ij'], msg['u_hat']
# line 4
c_i = F.softmax(b_ij_c, dim=0)
# line 5
s_j = (c_i.unsqueeze(2).unsqueeze(3) * u_hat).sum(dim=1)
return {'h': s_j}
##############################################################################
# Node Update Functions (line 6)
# ......................................................
# Squash the intermediate representations into node features :math:`v_j`
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step6.png
#
def capsule_update(msg):
v_j = squash(msg['h'])
return {'h': v_j}
##############################################################################
# Edge Update Functions (line 7)
# ...........................................................................
# Update the routing parameters by updating edges in graph
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step7.png
#
def update_edge(u, v, edge):
return {'b_ij': edge['b_ij'] + (v['h'] * edge['u_hat']).mean(dim=1).sum(dim=1)}
##############################################################################
# Call DGL function to execute algorithm
# ````````````````````````````````````````````````````````````````````````````
# Call ``update_all`` and ``update_edge`` functions to execute the whole algorithms.
# Message function is to define which attributes are needed in further computations
#
def routing(self):
def capsule_msg(src, edge):
return {'b_ij': edge['b_ij'], 'h': src['h'], 'u_hat': edge['u_hat']}
self.g.update_all(capsule_msg, capsule_reduce, capsule_update)
self.g.update_edge(edge_func=update_edge)
##############################################################################
# Forward Function
# ````````````````````````````````````````````````````````````````````````````
# This section shows the whole process of forward process of capsule routing algorithm.
def forward(self, x):
self.batch_size = x.size(0)
u_hat = self.compute_uhat(x)
self.initialize_nodes_and_edges_features(u_hat)
for i in range(self.num_routing):
self.routing()
this_layer_nodes_feature = self.g.get_n_repr()['h'][
self.input_capsule_num:self.input_capsule_num + self.output_capsule_num]
return this_layer_nodes_feature.transpose(0, 1).unsqueeze(1).unsqueeze(4).squeeze(1)
##############################################################################
# Other Workaround
# ````````````````````````````````````````````````````````````````
# Initialization & Affine Transformation
# ..................................................
# This section implements the transformation operation in capsule networks,
# which transform capsule into different dimensions.
# - Pre-compute :math:`\hat{u}_{j|i}`, initialize :math:`b_{ij}` and store them as edge attribute
# - Initialize node features as zero
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f4.png
#
def compute_uhat(self, x):
# x is the input vextor with shape [batch_size, input_capsule_dim, input_num]
# Transpose x to [batch_size, input_num, input_capsule_dim]
x = x.transpose(1, 2)
# Expand x to [batch_size, input_num, output_num, input_capsule_dim, 1]
x = torch.stack([x] * self.output_capsule_num, dim=2).unsqueeze(4)
# Expand W from [input_num, output_num, input_capsule_dim, output_capsule_dim]
# to [batch_size, input_num, output_num, output_capsule_dim, input_capsule_dim]
W = self.weight.expand(self.batch_size, *self.weight.size())
# u_hat's shape is [input_num, output_num, batch_size, output_capsule_dim]
u_hat = torch.matmul(W, x).permute(1, 2, 0, 3, 4).squeeze().contiguous()
return u_hat
def initialize_nodes_and_edges_features(self, u_hat):
b_ij = torch.zeros(self.input_capsule_num, self.output_capsule_num).to(self.device)
self.g.set_e_repr({'b_ij': b_ij.view(-1)})
self.g.set_e_repr({'u_hat': u_hat.view(-1, self.batch_size, self.output_capsule_dim)})
# Initialize all node features as zero
node_features = torch.zeros(self.input_capsule_num + self.output_capsule_num, self.batch_size,
self.output_capsule_dim).to(self.device)
self.g.set_n_repr({'h': node_features})
##############################################################################
# Squash function
# ..................
# Squashing function is to ensure that short vectors get shrunk to almost zero
# length and long vectors get shrunk to a length slightly below 1. Its norm is
# expected to represents probabilities at some levels.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/squash.png
# :height: 100px
#
def squash(s, dim=2):
sq = torch.sum(s ** 2, dim=dim, keepdim=True)
s_std = torch.sqrt(sq)
s = (sq / (1.0 + sq)) * (s / s_std)
return s
##############################################################################
# General Setup
# .................
import dgl
import torch
import torch.nn.functional as F
from torch import nn
class DGLDigitCapsuleLayer(nn.Module):
def __init__(self,
input_capsule_dim=8,
input_capsule_num=1152,
output_capsule_num=10,
output_capsule_dim=16,
num_routing=3,
device='cpu'):
super(DGLDigitCapsuleLayer, self).__init__()
self.device = device
self.input_capsule_dim = input_capsule_dim
self.input_capsule_num = input_capsule_num
self.output_capsule_dim = output_capsule_dim
self.output_capsule_num = output_capsule_num
self.num_routing = num_routing
self.weight = nn.Parameter(
torch.randn(input_capsule_num, output_capsule_num, output_capsule_dim, input_capsule_dim))
self.g, self.input_nodes, self.output_nodes = self.construct_graph()
# This section is for defining class in multiple cells.
DGLDigitCapsuleLayer.construct_graph = construct_graph
DGLDigitCapsuleLayer.forward = forward
DGLDigitCapsuleLayer.routing = routing
DGLDigitCapsuleLayer.compute_uhat = compute_uhat
DGLDigitCapsuleLayer.initialize_nodes_and_edges_features = initialize_nodes_and_edges_features
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment