Unverified Commit 9fd94b53 authored by Minjie Wang's avatar Minjie Wang Committed by GitHub
Browse files

fix capsule doc (#104)

parent 53e7f731
"""
Your first example in DGL
=========================
TODO: either a pagerank or SSSP example
"""
###############################################################################
# Create a DGLGraph
# -----------------
#
# To start with, let's first import dgl
import dgl
"""
Use DGLGraph
============
In this tutorial, we introduce how to use our graph class -- ``DGLGraph``.
The ``DGLGraph`` is the very core data structure in our library. It provides the basic
interfaces to manipulate graph structure, set/get node/edge features and convert
from/to many other graph formats. You can also perform computation on the graph
using our message passing APIs. (TODO: give a link here to the message passing doc)
"""
###############################################################################
# Construct a graph
# -----------------
#
# In ``DGLGraph``, all nodes are represented using consecutive integers starting from
# zero. All edges are directed. Let us start by creating a star network of 10 nodes
# where all the edges point to the center node (node#0).
# TODO(minjie): it's better to plot the graph here.
import dgl
star = dgl.DGLGraph()
star.add_nodes(10) # add 10 nodes
for i in range(1, 10):
star.add_edge(i, 0)
print('#Nodes:', star.number_of_nodes())
print('#Edges:', star.number_of_edges())
###############################################################################
# ``DGLGraph`` also supports adding multiple edges at once by providing multiple
# source and destination nodes. Multiple nodes are represented using either a
# list or a 1D integer tensor(vector). In addition to this, we also support
# "edge broadcasting":
#
# .. note::
#
# Given two source and destination node list/tensor ``u`` and ``v``.
#
# - If ``len(u) == len(v)``, then this is a many-many edge set and
# each edge is represented by ``(u[i], v[i])``.
# - If ``len(u) == 1``, then this is a one-many edge set.
# - If ``len(v) == 1``, then this is a many-one edge set.
#
# Edge broadcasting is supported in many APIs whenever a bunch of edges need
# to be specified. The example below creates the same star graph as the previous one.
star.clear() # clear the previous graph
star.add_nodes(10)
u = list(range(1, 10)) # can also use tensor type here (e.g. torch.Tensor)
star.add_edges(u, 0) # many-one edge set
print('#Nodes:', star.number_of_nodes())
print('#Edges:', star.number_of_edges())
###############################################################################
# In ``DGLGraph``, each edge is assigned an internal edge id (also a consecutive
# integer starting from zero). The ids follow the addition order of the edges
# and you can query the id using the ``edge_ids`` interface.
print(star.edge_ids(1, 0)) # the first edge
print(star.edge_ids([8, 9], 0)) # ask for ids of multiple edges
###############################################################################
# Assigning consecutive integer ids for nodes and edges makes it easier to batch
# their features together (see next section). As a result, removing nodes or edges
# of a ``DGLGraph`` is currently not supported because this will break the assumption
# that the ids form a consecutive range from zero.
###############################################################################
# Node and edge features
# ----------------------
# Nodes and edges can have feature data in tensor type. They can be accessed/updated
# through a key-value storage interface. The key must be hashable. The value should
# be features of each node and edge batched on the *first* dimension. For example,
# following codes create features for all nodes (``hv``) and features for all
# edges (``he``). Each feature is a vector of length 3.
#
# .. note::
#
# The first dimension is usually reserved as batch dimension in DGL. Thus, even setting
# only one node/edge still needs to have an extra dimension (of length one).
import torch as th
D = 3 # the feature dimension
N = star.number_of_nodes()
M = star.number_of_edges()
nfeat = th.randn((N, D)) # some random node features
efeat = th.randn((M, D)) # some random edge features
# TODO(minjie): enable following syntax
# star.nodes[:]['hv'] = nfeat
# star.edges[:]['he'] = efeat
star.set_n_repr({'hv' : nfeat})
star.set_e_repr({'he' : efeat})
###############################################################################
# We can then set some nodes' features to be zero.
# TODO(minjie): enable following syntax
# print(star.nodes[:]['hv'])
print(star.get_n_repr()['hv'])
# set node 0, 2, 4 feature to zero
star.set_n_repr({'hv' : th.zeros((3, D))}, [0, 2, 4])
print(star.get_n_repr()['hv'])
###############################################################################
# Once created, each node/edge feature will be associated with a *scheme* containing
# the shape, dtype information of the feature tensor. Updating features using data
# of different scheme will raise error unless all the features are updated,
# in which case the scheme will be replaced with the new one.
print(star.node_attr_schemes())
# updating features with different scheme will raise error
# star.set_n_repr({'hv' : th.zeros((3, 2*D))}, [0, 2, 4])
# updating all the nodes is fine, the old scheme will be replaced
star.set_n_repr({'hv' : th.zeros((N, 2*D))})
print(star.node_attr_schemes())
###############################################################################
# If a new feature is added for some but not all of the nodes/edges, we will
# automatically create empty features for the others to make sure that features are
# always aligned. By default, we fill zero for the empty features. The behavior
# can be changed using ``set_n_initializer`` and ``set_e_initializer``.
star.set_n_repr({'hv_1' : th.randn((3, D+1))}, [0, 2, 4])
print(star.node_attr_schemes())
print(star.get_n_repr()['hv_1'])
###############################################################################
# Convert from/to other formats
# -----------------------------
# DGLGraph can be easily converted from/to ``networkx`` graph.
import networkx as nx
# note that networkx create undirected graph by default, so when converting
# to DGLGraph, directed edges of both directions will be added.
nx_star = nx.star_graph(9)
star = dgl.DGLGraph(nx_star)
print('#Nodes:', star.number_of_nodes())
print('#Edges:', star.number_of_edges())
###############################################################################
# Node and edge attributes can be automatically batched when converting from
# ``networkx`` graph. Since ``networkx`` graph by default does not tell which
# edge is added the first, we use the ``"id"`` edge attribute as a hint
# if available.
for i in range(10):
nx_star.nodes[i]['feat'] = th.randn((D,))
star = dgl.DGLGraph()
star.from_networkx(nx_star, node_attrs=['feat']) # auto-batch specified node features
print(star.get_n_repr()['feat'])
###############################################################################
# Multi-edge graph
# ----------------
# There are many applications that work on graphs containing multi-edges. To enable
# this, construct ``DGLGraph`` with ``multigraph=True``.
g = dgl.DGLGraph(multigraph=True)
g.add_nodes(5)
g.add_edge(0, 1)
g.add_edge(1, 2)
g.add_edge(0, 1)
print('#Nodes:', g.number_of_nodes())
print('#Edges:', g.number_of_edges())
# init random edge features
M = g.number_of_edges()
g.set_e_repr({'he' : th.randn((M, D))})
###############################################################################
# Because an edge in multi-graph cannot be uniquely identified using its incident
# nodes ``u`` and ``v``, you need to use edge id to access edge features. The
# edge ids can be queried from ``edge_id`` interface.
eid_01 = g.edge_id(0, 1)
print(eid_01)
###############################################################################
# We can then use the edge id to set/get the features of the corresponding edge.
g.set_e_repr_by_id({'he' : th.ones(len(eid_01), D)}, eid=eid_01)
print(g.get_e_repr()['he'])
Model Tutorials
===============
Graph-based DNN models in DGL.
""" """
Capsule Network Capsule Network
================ ================
**Author**: `Jinjing Zhou` **Author**: `Jinjing Zhou`
This tutorial explains how to use DGL library and its language to implement the `capsule network <http://arxiv.org/abs/1710.09829>`__ proposed by Geoffrey Hinton and his team. The algorithm aims to provide a better alternative to current neural network structures. By using DGL library, users can implement the algorithm in a more intuitive way. This tutorial explains how to use DGL library and its language to implement the
`capsule network <http://arxiv.org/abs/1710.09829>`__ proposed by Geoffrey Hinton and his team.
The algorithm aims to provide a better alternative to current neural network structures.
By using DGL library, users can implement the algorithm in a more intuitive way.
""" """
############################################################################## ##############################################################################
# Model Overview # Model Overview
# --------------- # ---------------
# Introduction # Introduction
# ``````````````````` # ```````````````````
# Capsule Network were first introduced in 2011 by Geoffrey Hinton, et al., in paper `Transforming Autoencoders <https://www.cs.toronto.edu/~fritz/absps/transauto6.pdf>`__, but it was only a few months ago, in November 2017, that Sara Sabour, Nicholas Frosst, and Geoffrey Hinton published a paper called Dynamic Routing between Capsules, where they introduced a CapsNet architecture that reached state-of-the-art performance on MNIST. # Capsule Network were first introduced in 2011 by Geoffrey Hinton, et al.,
# in paper `Transforming Autoencoders <https://www.cs.toronto.edu/~fritz/absps/transauto6.pdf>`__,
# but it was only a few months ago, in November 2017, that Sara Sabour, Nicholas Frosst,
# and Geoffrey Hinton published a paper called Dynamic Routing between Capsules, where they
# introduced a CapsNet architecture that reached state-of-the-art performance on MNIST.
# #
# What's a capsule? # What's a capsule?
# ``````````````````` # ```````````````````
# In papers, author states that "A capsule is a group of neurons whose activity vector represents the instantiation parameters of a specific type of entity such as an object or an object part." # In papers, author states that "A capsule is a group of neurons whose activity vector
# Generally Speaking, the idea of capsule is to encode all the information about the features into a vector form, by substituting scalars in traditional neural network with vectors. And use the norm of the vector to represents the meaning of original scalars. # represents the instantiation parameters of a specific type of entity such as an object
# or an object part."
# Generally Speaking, the idea of capsule is to encode all the information about the
# features into a vector form, by substituting scalars in traditional neural network with vectors.
# And use the norm of the vector to represents the meaning of original scalars.
# #
# .. image:: /_static/capsule_f1.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f1.png
# #
# Dynamic Routing Algorithm # Dynamic Routing Algorithm
# ````````````````````````````` # `````````````````````````````
# Due to the different structure of network, capsules network has different operations to calculate results. This figure shows the comparison, drawn by `Max Pechyonkin <https://medium.com/ai%C2%B3-theory-practice-business/understanding-hintons-capsule-networks-part-ii-how-capsules-work-153b6ade9f66O>`__ # Due to the different structure of network, capsules network has different operations to
# calculate results. This figure shows the comparison, drawn by
# `Max Pechyonkin <https://medium.com/ai%C2%B3-theory-practice-business/understanding-hintons-capsule-networks-part-ii-how-capsules-work-153b6ade9f66O>`__
# #
# .. image:: /_static/capsule_f2.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f2.png
# :height: 250px # :height: 250px
# #
# The key idea is that the output of each capsule is the sum of weighted input vectors. We will go into details in the later section with code implementations. # The key idea is that the output of each capsule is the sum of weighted input vectors.
# We will go into details in the later section with code implementations.
# #
# Model Implementations # Model Implementations
# ------------------------- # -------------------------
...@@ -61,10 +73,10 @@ class DGLBatchCapsuleLayer(nn.Module): ...@@ -61,10 +73,10 @@ class DGLBatchCapsuleLayer(nn.Module):
# ```````````````````````````````````````````````````````````````````````````` # ````````````````````````````````````````````````````````````````````````````
# We can consider each capsule as a node in a graph, and connect all the nodes between layers. # We can consider each capsule as a node in a graph, and connect all the nodes between layers.
# #
# .. image:: /_static/capsule_f3.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f3.png
# :height: 200px # :height: 200px
# #
def construct_graph(self): def construct_graph(self):
g = dgl.DGLGraph() g = dgl.DGLGraph()
g.add_nodes(self.input_capsule_num + self.output_capsule_num) g.add_nodes(self.input_capsule_num + self.output_capsule_num)
input_nodes = list(range(self.input_capsule_num)) input_nodes = list(range(self.input_capsule_num))
...@@ -76,6 +88,7 @@ class DGLBatchCapsuleLayer(nn.Module): ...@@ -76,6 +88,7 @@ class DGLBatchCapsuleLayer(nn.Module):
v.append(j) v.append(j)
g.add_edges(u, v) g.add_edges(u, v)
return g, input_nodes, output_nodes return g, input_nodes, output_nodes
DGLBatchCapsuleLayer.construct_graph = construct_graph # This line is for defining class in multiple cells.
############################################################################## ##############################################################################
# Initialization & Affine Transformation # Initialization & Affine Transformation
...@@ -83,9 +96,9 @@ class DGLBatchCapsuleLayer(nn.Module): ...@@ -83,9 +96,9 @@ class DGLBatchCapsuleLayer(nn.Module):
# - Pre-compute :math:`\hat{u}_{j|i}`, initialize :math:`b_{ij}` and store them as edge attribute # - Pre-compute :math:`\hat{u}_{j|i}`, initialize :math:`b_{ij}` and store them as edge attribute
# - Initialize node features as zero # - Initialize node features as zero
# #
# .. image:: /_static/capsule_f4.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f4.png
# #
def forward(self, x): def forward(self, x):
self.batch_size = x.size(0) self.batch_size = x.size(0)
# x is the input vextor with shape [batch_size, input_capsule_dim, input_num] # x is the input vextor with shape [batch_size, input_capsule_dim, input_num]
# Transpose x to [batch_size, input_num, input_capsule_dim] # Transpose x to [batch_size, input_num, input_capsule_dim]
...@@ -109,19 +122,20 @@ class DGLBatchCapsuleLayer(nn.Module): ...@@ -109,19 +122,20 @@ class DGLBatchCapsuleLayer(nn.Module):
node_features = torch.zeros(self.input_capsule_num + self.output_capsule_num, self.batch_size, node_features = torch.zeros(self.input_capsule_num + self.output_capsule_num, self.batch_size,
self.output_capsule_dim).to(self.device) self.output_capsule_dim).to(self.device)
self.g.set_n_repr({'h': node_features}) self.g.set_n_repr({'h': node_features})
DGLBatchCapsuleLayer.forward = forward
############################################################################## ##############################################################################
# Write Message Passing functions and Squash function # Write Message Passing functions and Squash function
# ```````````````````````````````````````````````````````````````````````````` # ````````````````````````````````````````````````````````````````````````````
# Squash function # Squash function
# .................. # ..................
# Squashing function is to ensure that short vectors get shrunk to almost zero length and long vectors get shrunk to a length slightly below 1. # Squashing function is to ensure that short vectors get shrunk to almost zero length and
# long vectors get shrunk to a length slightly below 1.
# #
# .. image:: /_static/squash.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/squash.png
# :height: 100px # :height: 100px
# #
@staticmethod def squash(s):
def squash(s):
mag_sq = torch.sum(s ** 2, dim=2, keepdim=True) mag_sq = torch.sum(s ** 2, dim=2, keepdim=True)
mag = torch.sqrt(mag_sq) mag = torch.sqrt(mag_sq)
s = (mag_sq / (1.0 + mag_sq)) * (s / mag) s = (mag_sq / (1.0 + mag_sq)) * (s / mag)
...@@ -131,25 +145,25 @@ class DGLBatchCapsuleLayer(nn.Module): ...@@ -131,25 +145,25 @@ class DGLBatchCapsuleLayer(nn.Module):
############################################################################## ##############################################################################
# Message Functions # Message Functions
# .................. # ..................
# At first stage, we need to define a message function to get all the attributes we need in the further computations. # At first stage, we need to define a message function to get all the attributes we need
@staticmethod # in the further computations.
def capsule_msg(src, edge): def capsule_msg(src, edge):
return {'b_ij': edge['b_ij'], 'h': src['h'], 'u_hat': edge['u_hat']} return {'b_ij': edge['b_ij'], 'h': src['h'], 'u_hat': edge['u_hat']}
############################################################################## ##############################################################################
# Reduce Functions # Reduce Functions
# .................. # ..................
# At this stage, we need to define a reduce function to aggregate all the information we get from message function into node features. # At this stage, we need to define a reduce function to aggregate all the information we
# This step implements the line 4 and line 5 in routing algorithms, which softmax over :math:`b_{ij}` and calculate weighted sum of input features. # get from message function into node features.
# This step implements the line 4 and line 5 in routing algorithms, which softmax over
# :math:`b_{ij}` and calculate weighted sum of input features.
# #
# .. note:: # .. note::
# that softmax operation is over dimension :math:`j` instead of :math:`i`. # The softmax operation is over dimension :math:`j` instead of :math:`i`.
# #
# .. image:: /_static/capsule_f5.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f5.png
# #
def capsule_reduce(node, msg):
@staticmethod
def capsule_reduce(node, msg):
b_ij_c, u_hat = msg['b_ij'], msg['u_hat'] b_ij_c, u_hat = msg['b_ij'], msg['u_hat']
# line 4 # line 4
c_i = F.softmax(b_ij_c, dim=0) c_i = F.softmax(b_ij_c, dim=0)
...@@ -162,10 +176,10 @@ class DGLBatchCapsuleLayer(nn.Module): ...@@ -162,10 +176,10 @@ class DGLBatchCapsuleLayer(nn.Module):
# ........................... # ...........................
# Squash the intermidiate representations into node features :math:`v_j` # Squash the intermidiate representations into node features :math:`v_j`
# #
# .. image:: /_static/step6.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step6.png
# #
def capsule_update(self, msg): def capsule_update(msg):
v_j = self.squash(msg['h']) v_j = squash(msg['h'])
return {'h': v_j} return {'h': v_j}
############################################################################## ##############################################################################
...@@ -173,17 +187,17 @@ class DGLBatchCapsuleLayer(nn.Module): ...@@ -173,17 +187,17 @@ class DGLBatchCapsuleLayer(nn.Module):
# .......................... # ..........................
# Update the routing parameters # Update the routing parameters
# #
# .. image:: /_static/step7.png # .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step7.png
# #
def update_edge(self, u, v, edge): def update_edge(u, v, edge):
return {'b_ij': edge['b_ij'] + (v['h'] * edge['u_hat']).mean(dim=1).sum(dim=1)} return {'b_ij': edge['b_ij'] + (v['h'] * edge['u_hat']).mean(dim=1).sum(dim=1)}
############################################################################## ##############################################################################
# Executing algorithm # Executing algorithm
# ..................... # .....................
# Call `update_all` and `update_edge` functions to execute the algorithms # Call `update_all` and `update_edge` functions to execute the algorithms
def routing(self): def routing(self):
for i in range(self.num_routing): for i in range(self.num_routing):
self.g.update_all(self.capsule_msg, self.capsule_reduce, self.capsule_update) self.g.update_all(capsule_msg, capsule_reduce, capsule_update)
self.g.update_edge(edge_func=self.update_edge) self.g.update_edge(edge_func=update_edge)
DGLBatchCapsuleLayer.routing = routing
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment