fix capsule doc (#104)

9fd94b53 · Minjie Wang · GitHub · 53e7f731 · 53e7f731 · 53e7f731
Unverified Commit 9fd94b53 authored Oct 25, 2018 by Minjie Wang Committed by GitHub Oct 25, 2018
12 changed files
--- a/docs/source/_static/capsule_f1.png
+++ b/docs/source/_static/capsule_f1.png
--- a/docs/source/_static/capsule_f2.png
+++ b/docs/source/_static/capsule_f2.png
--- a/docs/source/_static/capsule_f3.png
+++ b/docs/source/_static/capsule_f3.png
--- a/docs/source/_static/capsule_f4.png
+++ b/docs/source/_static/capsule_f4.png
--- a/docs/source/_static/capsule_f5.png
+++ b/docs/source/_static/capsule_f5.png
--- a/docs/source/_static/squash.png
+++ b/docs/source/_static/squash.png
--- a/docs/source/_static/step6.png
+++ b/docs/source/_static/step6.png
--- a/docs/source/_static/step7.png
+++ b/docs/source/_static/step7.png
--- a/tutorials/first.py
+++ b/tutorials/first.py
-"""
-Your first example in DGL
-=========================
-TODO: either a pagerank or SSSP example
-"""
-###############################################################################
-# Create a DGLGraph
-# -----------------
-#
-# To start with, let's first import dgl
-import dgl
--- a/tutorials/graph.py
+++ b/tutorials/graph.py
-"""
-Use DGLGraph
-============
-In this tutorial, we introduce how to use our graph class -- ``DGLGraph``.
-The ``DGLGraph`` is the very core data structure in our library. It provides the basic
-interfaces to manipulate graph structure, set/get node/edge features and convert
-from/to many other graph formats. You can also perform computation on the graph
-using our message passing APIs. (TODO: give a link here to the message passing doc)
-"""
-###############################################################################
-# Construct a graph
-# -----------------
-# 
-# In ``DGLGraph``, all nodes are represented using consecutive integers starting from
-# zero. All edges are directed. Let us start by creating a star network of 10 nodes
-# where all the edges point to the center node (node#0).
-# TODO(minjie): it's better to plot the graph here.
-import dgl
-star = dgl.DGLGraph()
-star.add_nodes(10)  # add 10 nodes
-for i in range(1, 10):
-    star.add_edge(i, 0)
-print('#Nodes:', star.number_of_nodes())
-print('#Edges:', star.number_of_edges())
-###############################################################################
-# ``DGLGraph`` also supports adding multiple edges at once by providing multiple
-# source and destination nodes. Multiple nodes are represented using either a
-# list or a 1D integer tensor(vector). In addition to this, we also support
-# "edge broadcasting":
-#
-# .. note::
-# 
-#   Given two source and destination node list/tensor ``u`` and ``v``.
-#
-#   - If ``len(u) == len(v)``, then this is a many-many edge set and
-#     each edge is represented by ``(u[i], v[i])``.
-#   - If ``len(u) == 1``, then this is a one-many edge set.
-#   - If ``len(v) == 1``, then this is a many-one edge set.
-#
-# Edge broadcasting is supported in many APIs whenever a bunch of edges need
-# to be specified. The example below creates the same star graph as the previous one.
-star.clear()  # clear the previous graph
-star.add_nodes(10)
-u = list(range(1, 10))  # can also use tensor type here (e.g. torch.Tensor)
-star.add_edges(u, 0)  # many-one edge set
-print('#Nodes:', star.number_of_nodes())
-print('#Edges:', star.number_of_edges())
-###############################################################################
-# In ``DGLGraph``, each edge is assigned an internal edge id (also a consecutive
-# integer starting from zero). The ids follow the addition order of the edges
-# and you can query the id using the ``edge_ids`` interface.
-print(star.edge_ids(1, 0))  # the first edge
-print(star.edge_ids([8, 9], 0))  # ask for ids of multiple edges
-###############################################################################
-# Assigning consecutive integer ids for nodes and edges makes it easier to batch
-# their features together (see next section). As a result, removing nodes or edges
-# of a ``DGLGraph`` is currently not supported because this will break the assumption
-# that the ids form a consecutive range from zero.
-###############################################################################
-# Node and edge features
-# ----------------------
-# Nodes and edges can have feature data in tensor type. They can be accessed/updated
-# through a key-value storage interface. The key must be hashable. The value should
-# be features of each node and edge batched on the *first* dimension. For example,
-# following codes create features for all nodes (``hv``) and features for all
-# edges (``he``). Each feature is a vector of length 3.
-#
-# .. note::
-#
-#   The first dimension is usually reserved as batch dimension in DGL. Thus, even setting
-#   only one node/edge still needs to have an extra dimension (of length one).
-import torch as th
-D = 3  # the feature dimension
-N = star.number_of_nodes()
-M = star.number_of_edges()
-nfeat = th.randn((N, D))  # some random node features
-efeat = th.randn((M, D))  # some random edge features
-# TODO(minjie): enable following syntax
-# star.nodes[:]['hv'] = nfeat
-# star.edges[:]['he'] = efeat
-star.set_n_repr({'hv' : nfeat})
-star.set_e_repr({'he' : efeat})
-###############################################################################
-# We can then set some nodes' features to be zero.
-# TODO(minjie): enable following syntax
-# print(star.nodes[:]['hv'])
-print(star.get_n_repr()['hv'])
-# set node 0, 2, 4 feature to zero
-star.set_n_repr({'hv' : th.zeros((3, D))}, [0, 2, 4])
-print(star.get_n_repr()['hv'])
-###############################################################################
-# Once created, each node/edge feature will be associated with a *scheme* containing
-# the shape, dtype information of the feature tensor. Updating features using data
-# of different scheme will raise error unless all the features are updated,
-# in which case the scheme will be replaced with the new one.
-print(star.node_attr_schemes())
-# updating features with different scheme will raise error
-# star.set_n_repr({'hv' : th.zeros((3, 2*D))}, [0, 2, 4])
-# updating all the nodes is fine, the old scheme will be replaced
-star.set_n_repr({'hv' : th.zeros((N, 2*D))})
-print(star.node_attr_schemes())
-###############################################################################
-# If a new feature is added for some but not all of the nodes/edges, we will
-# automatically create empty features for the others to make sure that features are
-# always aligned. By default, we fill zero for the empty features. The behavior
-# can be changed using ``set_n_initializer`` and ``set_e_initializer``.
-star.set_n_repr({'hv_1' : th.randn((3, D+1))}, [0, 2, 4])
-print(star.node_attr_schemes())
-print(star.get_n_repr()['hv_1'])
-###############################################################################
-# Convert from/to other formats
-# -----------------------------
-# DGLGraph can be easily converted from/to ``networkx`` graph.
-import networkx as nx
-# note that networkx create undirected graph by default, so when converting
-# to DGLGraph, directed edges of both directions will be added.
-nx_star = nx.star_graph(9)
-star = dgl.DGLGraph(nx_star)
-print('#Nodes:', star.number_of_nodes())
-print('#Edges:', star.number_of_edges())
-###############################################################################
-# Node and edge attributes can be automatically batched when converting from
-# ``networkx`` graph. Since ``networkx`` graph by default does not tell which
-# edge is added the first, we use the ``"id"`` edge attribute as a hint
-# if available.
-for i in range(10):
-    nx_star.nodes[i]['feat'] = th.randn((D,))
-star = dgl.DGLGraph()
-star.from_networkx(nx_star, node_attrs=['feat'])  # auto-batch specified node features
-print(star.get_n_repr()['feat'])
-###############################################################################
-# Multi-edge graph
-# ----------------
-# There are many applications that work on graphs containing multi-edges. To enable
-# this, construct ``DGLGraph`` with ``multigraph=True``.
-g = dgl.DGLGraph(multigraph=True)
-g.add_nodes(5)
-g.add_edge(0, 1)
-g.add_edge(1, 2)
-g.add_edge(0, 1)
-print('#Nodes:', g.number_of_nodes())
-print('#Edges:', g.number_of_edges())
-# init random edge features
-M = g.number_of_edges()
-g.set_e_repr({'he' : th.randn((M, D))})
-###############################################################################
-# Because an edge in multi-graph cannot be uniquely identified using its incident
-# nodes ``u`` and ``v``, you need to use edge id to access edge features. The
-# edge ids can be queried from ``edge_id`` interface.
-eid_01 = g.edge_id(0, 1)
-print(eid_01)
-###############################################################################
-# We can then use the edge id to set/get the features of the corresponding edge.
-g.set_e_repr_by_id({'he' : th.ones(len(eid_01), D)}, eid=eid_01)
-print(g.get_e_repr()['he'])
--- a/tutorials/models/README.txt
+++ b/tutorials/models/README.txt
+Model Tutorials
+===============
+Graph-based DNN models in DGL.
--- a/tutorials/capsule.py
+++ b/tutorials/capsule.py
 """
 Capsule Network
 ================
 **Author**: `Jinjing Zhou`
-This tutorial explains how to use DGL library and its language to implement the `capsule network <http://arxiv.org/abs/1710.09829>`__ proposed by Geoffrey Hinton and his team. The algorithm aims to provide a better alternative to current neural network structures. By using DGL library, users can implement the algorithm in a more intuitive way.
+This tutorial explains how to use DGL library and its language to implement the
+`capsule network <http://arxiv.org/abs/1710.09829>`__ proposed by Geoffrey Hinton and his team.
+The algorithm aims to provide a better alternative to current neural network structures.
+By using DGL library, users can implement the algorithm in a more intuitive way.
 """
 ##############################################################################
 # Model Overview
 # ---------------
 # Introduction
 # ```````````````````
-# Capsule Network were first introduced in 2011 by Geoffrey Hinton, et al., in paper `Transforming Autoencoders <https://www.cs.toronto.edu/~fritz/absps/transauto6.pdf>`__, but it was only a few months ago, in November 2017, that Sara Sabour, Nicholas Frosst, and Geoffrey Hinton published a paper called Dynamic Routing between Capsules, where they introduced a CapsNet architecture that reached state-of-the-art performance on MNIST.
+# Capsule Network were first introduced in 2011 by Geoffrey Hinton, et al.,
+# in paper `Transforming Autoencoders <https://www.cs.toronto.edu/~fritz/absps/transauto6.pdf>`__,
+# but it was only a few months ago, in November 2017, that Sara Sabour, Nicholas Frosst,
+# and Geoffrey Hinton published a paper called Dynamic Routing between Capsules, where they
+# introduced a CapsNet architecture that reached state-of-the-art performance on MNIST.
 #  
 # What's a capsule?
 # ```````````````````
-# In papers, author states that "A capsule is a group of neurons whose activity vector represents the instantiation parameters of a specific type of entity such as an object or an object part."    
+# In papers, author states that "A capsule is a group of neurons whose activity vector
-# Generally Speaking, the idea of capsule is to encode all the information about the features into a vector form, by substituting scalars in traditional neural network with vectors. And use the norm of the vector to represents the meaning of original scalars. 
+# represents the instantiation parameters of a specific type of entity such as an object
+# or an object part."    
+# Generally Speaking, the idea of capsule is to encode all the information about the
+# features into a vector form, by substituting scalars in traditional neural network with vectors.
+# And use the norm of the vector to represents the meaning of original scalars. 
 # 
-# .. image:: /_static/capsule_f1.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f1.png
 # 
 # Dynamic Routing Algorithm
 # `````````````````````````````
-# Due to the different structure of network, capsules network has different operations to calculate results. This figure shows the comparison, drawn by `Max Pechyonkin <https://medium.com/ai%C2%B3-theory-practice-business/understanding-hintons-capsule-networks-part-ii-how-capsules-work-153b6ade9f66O>`__
+# Due to the different structure of network, capsules network has different operations to
+# calculate results. This figure shows the comparison, drawn by
+# `Max Pechyonkin <https://medium.com/ai%C2%B3-theory-practice-business/understanding-hintons-capsule-networks-part-ii-how-capsules-work-153b6ade9f66O>`__
 # 
-# .. image:: /_static/capsule_f2.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f2.png
 #    :height: 250px
 # 
-# The key idea is that the output of each capsule is the sum of weighted input vectors. We will go into details in the later section with code implementations.
+# The key idea is that the output of each capsule is the sum of weighted input vectors.
+# We will go into details in the later section with code implementations.
 # 
 # Model Implementations
 # -------------------------
@@ -61,10 +73,10 @@ class DGLBatchCapsuleLayer(nn.Module):
 # ````````````````````````````````````````````````````````````````````````````
 # We can consider each capsule as a node in a graph, and connect all the nodes between layers.  
 # 
-# .. image:: /_static/capsule_f3.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f3.png
 #    :height: 200px
 # 
-    def construct_graph(self):
+def construct_graph(self):
    g = dgl.DGLGraph()
    g.add_nodes(self.input_capsule_num + self.output_capsule_num)
    input_nodes = list(range(self.input_capsule_num))
@@ -76,6 +88,7 @@ class DGLBatchCapsuleLayer(nn.Module):
            v.append(j)
    g.add_edges(u, v)
    return g, input_nodes, output_nodes
+DGLBatchCapsuleLayer.construct_graph = construct_graph  # This line is for defining class in multiple cells.
 ##############################################################################
 # Initialization & Affine Transformation
@@ -83,9 +96,9 @@ class DGLBatchCapsuleLayer(nn.Module):
 # - Pre-compute :math:`\hat{u}_{j|i}`, initialize :math:`b_{ij}` and store them as edge attribute
 # - Initialize node features as zero
 # 
-# .. image:: /_static/capsule_f4.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f4.png
 # 
-    def forward(self, x):
+def forward(self, x):
    self.batch_size = x.size(0)
    # x is the input vextor with shape [batch_size, input_capsule_dim, input_num]
    # Transpose x to [batch_size, input_num, input_capsule_dim]   
@@ -109,19 +122,20 @@ class DGLBatchCapsuleLayer(nn.Module):
    node_features = torch.zeros(self.input_capsule_num + self.output_capsule_num, self.batch_size,
                                self.output_capsule_dim).to(self.device)
    self.g.set_n_repr({'h': node_features})
+DGLBatchCapsuleLayer.forward = forward
 ##############################################################################
 # Write Message Passing functions and Squash function
 # ````````````````````````````````````````````````````````````````````````````
 # Squash function
 # ..................
-# Squashing function is to ensure that short vectors get shrunk to almost zero length and long vectors get shrunk to a length slightly below 1.
+# Squashing function is to ensure that short vectors get shrunk to almost zero length and
+# long vectors get shrunk to a length slightly below 1.
 # 
-# .. image:: /_static/squash.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/squash.png
 #    :height: 100px
 # 
-    @staticmethod
+def squash(s):
-    def squash(s):
    mag_sq = torch.sum(s ** 2, dim=2, keepdim=True)
    mag = torch.sqrt(mag_sq)
    s = (mag_sq / (1.0 + mag_sq)) * (s / mag)
@@ -131,25 +145,25 @@ class DGLBatchCapsuleLayer(nn.Module):
 ##############################################################################
 # Message Functions
 # ..................
-# At first stage, we need to define a message function to get all the attributes we need in the further computations.
+# At first stage, we need to define a message function to get all the attributes we need
-    @staticmethod
+# in the further computations.
-    def capsule_msg(src, edge):
+def capsule_msg(src, edge):
    return {'b_ij': edge['b_ij'], 'h': src['h'], 'u_hat': edge['u_hat']}
 ##############################################################################
 # Reduce Functions
 # ..................
-# At this stage, we need to define a reduce function to aggregate all the information we get from message function into node features.
+# At this stage, we need to define a reduce function to aggregate all the information we
-# This step implements the line 4 and line 5 in routing algorithms, which softmax over :math:`b_{ij}` and calculate weighted sum of input features.
+# get from message function into node features.
+# This step implements the line 4 and line 5 in routing algorithms, which softmax over
+# :math:`b_{ij}` and calculate weighted sum of input features.
 # 
 # .. note::
-#    that softmax operation is over dimension :math:`j` instead of :math:`i`. 
+#    The softmax operation is over dimension :math:`j` instead of :math:`i`. 
 # 
-# .. image:: /_static/capsule_f5.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f5.png
 # 
+def capsule_reduce(node, msg):
-    @staticmethod
-    def capsule_reduce(node, msg):
    b_ij_c, u_hat = msg['b_ij'], msg['u_hat']
    # line 4
    c_i = F.softmax(b_ij_c, dim=0)
@@ -162,10 +176,10 @@ class DGLBatchCapsuleLayer(nn.Module):
 # ...........................
 # Squash the intermidiate representations into node features :math:`v_j`
 # 
-# .. image:: /_static/step6.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step6.png
 # 
-    def capsule_update(self, msg):
+def capsule_update(msg):
-        v_j = self.squash(msg['h'])
+    v_j = squash(msg['h'])
    return {'h': v_j}
 ##############################################################################
@@ -173,17 +187,17 @@ class DGLBatchCapsuleLayer(nn.Module):
 # ..........................
 # Update the routing parameters
 # 
-# .. image:: /_static/step7.png
+# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step7.png
 # 
-    def update_edge(self, u, v, edge):
+def update_edge(u, v, edge):
    return {'b_ij': edge['b_ij'] + (v['h'] * edge['u_hat']).mean(dim=1).sum(dim=1)}
 ##############################################################################
 # Executing algorithm
 # .....................
 # Call `update_all` and `update_edge` functions to execute the algorithms
-    def routing(self):
+def routing(self):
    for i in range(self.num_routing):
-            self.g.update_all(self.capsule_msg, self.capsule_reduce, self.capsule_update)
+        self.g.update_all(capsule_msg, capsule_reduce, capsule_update)
-            self.g.update_edge(edge_func=self.update_edge)
+        self.g.update_edge(edge_func=update_edge)
+DGLBatchCapsuleLayer.routing = routing