Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
79ceccef
Unverified
Commit
79ceccef
authored
Nov 04, 2018
by
Minjie Wang
Committed by
GitHub
Nov 04, 2018
Browse files
[Doc] Fix missing file (#119)
parent
87ed21ec
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
417 additions
and
0 deletions
+417
-0
tutorials/models/1_gcn.py
tutorials/models/1_gcn.py
+163
-0
tutorials/models/2_capsule.py
tutorials/models/2_capsule.py
+254
-0
No files found.
tutorials/models/1_gcn.py
0 → 100644
View file @
79ceccef
"""
.. _model-gcn:
Graph Convolutional Network
====================================
**Author:** Qi Huang, `Minjie Wang <https://jermainewang.github.io/>`_,
Yu Gai, Quan Gan, Zheng Zhang
This is a gentle introduction of using DGL to implement Graph Convolutional
Networks (Kipf & Welling et al., `Semi-Supervised Classificaton with Graph
Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We build upon
the :doc:`earlier tutorial <../3_pagerank>` on DGLGraph and demonstrate
how DGL combines graph with deep neural network and learn structural representations.
"""
###############################################################################
# Model Overview
# ------------------------------------------
# GCN from the perspective of message passing
# ```````````````````````````````````````````````
# We describe a layer of graph convolutional neural network from a message
# passing perspective; the math can be found `here <math_>`_.
# It boils down to the following step, for each node :math:`u`:
#
# 1) Aggregate neighbors' representations :math:`h_{v}` to produce an
# intermediate representation :math:`\hat{h}_u`. 2) Transform the aggregated
# representation :math:`\hat{h}_{u}` with a linear projection followed by a
# non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`.
#
# We will implement step 1 with DGL message passing, and step 2 with the
# ``apply_nodes`` method, whose node UDF will be a PyTorch ``nn.Module``.
#
# GCN implementation with DGL
# ``````````````````````````````````````````
# We first define the message and reduce function as usual. Since the
# aggregation on a node :math:`u` only involves summing over the neighbors'
# representations :math:`h_v`, we can simply use builtin functions:
import
dgl
import
dgl.function
as
fn
import
torch
as
th
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
dgl
import
DGLGraph
gcn_msg
=
fn
.
copy_src
(
src
=
'h'
,
out
=
'm'
)
gcn_reduce
=
fn
.
sum
(
msg
=
'm'
,
out
=
'h'
)
###############################################################################
# We then define the node UDF for ``apply_nodes``, which is a fully-connected layer:
class
NodeApplyModule
(
nn
.
Module
):
def
__init__
(
self
,
in_feats
,
out_feats
,
activation
):
super
(
NodeApplyModule
,
self
).
__init__
()
self
.
linear
=
nn
.
Linear
(
in_feats
,
out_feats
)
self
.
activation
=
activation
def
forward
(
self
,
node
):
h
=
self
.
linear
(
node
.
data
[
'h'
])
h
=
self
.
activation
(
h
)
return
{
'h'
:
h
}
###############################################################################
# We then proceed to define the GCN module. A GCN layer essentially performs
# message passing on all the nodes then applies the `NodeApplyModule`. Note
# that we omitted the dropout in the paper for simplicity.
class
GCN
(
nn
.
Module
):
def
__init__
(
self
,
in_feats
,
out_feats
,
activation
):
super
(
GCN
,
self
).
__init__
()
self
.
apply_mod
=
NodeApplyModule
(
in_feats
,
out_feats
,
activation
)
def
forward
(
self
,
g
,
feature
):
g
.
ndata
[
'h'
]
=
feature
g
.
update_all
(
gcn_msg
,
gcn_reduce
)
g
.
apply_nodes
(
func
=
self
.
apply_mod
)
return
g
.
ndata
.
pop
(
'h'
)
###############################################################################
# The forward function is essentially the same as any other commonly seen NNs
# model in PyTorch. We can initialize GCN like any ``nn.Module``. For example,
# let's define a simple neural network consisting of two GCN layers. Suppose we
# are training the classifier for the cora dataset (the input feature size is
# 1433 and the number of classes is 7).
class
Net
(
nn
.
Module
):
def
__init__
(
self
):
super
(
Net
,
self
).
__init__
()
self
.
gcn1
=
GCN
(
1433
,
16
,
F
.
relu
)
self
.
gcn2
=
GCN
(
16
,
7
,
F
.
relu
)
def
forward
(
self
,
g
,
features
):
x
=
self
.
gcn1
(
g
,
features
)
x
=
self
.
gcn2
(
g
,
x
)
return
x
net
=
Net
()
print
(
net
)
###############################################################################
# We load the cora dataset using DGL's built-in data module.
from
dgl.data
import
citation_graph
as
citegrh
def
load_cora_data
():
data
=
citegrh
.
load_cora
()
features
=
th
.
FloatTensor
(
data
.
features
)
labels
=
th
.
LongTensor
(
data
.
labels
)
mask
=
th
.
ByteTensor
(
data
.
train_mask
)
g
=
DGLGraph
(
data
.
graph
)
return
g
,
features
,
labels
,
mask
###############################################################################
# We then train the network as follows:
import
time
import
numpy
as
np
g
,
features
,
labels
,
mask
=
load_cora_data
()
optimizer
=
th
.
optim
.
Adam
(
net
.
parameters
(),
lr
=
1e-3
)
dur
=
[]
for
epoch
in
range
(
30
):
if
epoch
>=
3
:
t0
=
time
.
time
()
logits
=
net
(
g
,
features
)
logp
=
F
.
log_softmax
(
logits
,
1
)
loss
=
F
.
nll_loss
(
logp
[
mask
],
labels
[
mask
])
optimizer
.
zero_grad
()
loss
.
backward
()
optimizer
.
step
()
if
epoch
>=
3
:
dur
.
append
(
time
.
time
()
-
t0
)
print
(
"Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}"
.
format
(
epoch
,
loss
.
item
(),
np
.
mean
(
dur
)))
###############################################################################
# .. _math:
#
# GCN in one formula
# ------------------
# Mathematically, the GCN model follows this formula:
#
# :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})`
#
# Here, :math:`H^{(l)}` denotes the :math:`l^{th}` layer in the network,
# :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for
# this layer. :math:`D` and :math:`A`, as commonly seen, represent degree
# matrix and adjacency matrix, respectively. The ~ is a renormalization trick
# in which we add a self-connection to each node of the graph, and build the
# corresponding degree and adjacency matrix. The shape of the input
# :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes
# and :math:`D` is the number of input features. We can chain up multiple
# layers as such to produce a node-level representation output with shape
# :math`N \times F`, where :math:`F` is the dimension of the output node
# feature vector.
#
# The equation can be efficiently implemented using sparse matrix
# multiplication kernels (such as Kipf's
# `pygcn <https://github.com/tkipf/pygcn>`_ code). The above DGL implementation
# in fact has already used this trick due to the use of builtin functions. To
# understand what is under the hood, please read our tutorial on :doc:` PageRank <3_pagerank>`.
tutorials/models/2_capsule.py
0 → 100644
View file @
79ceccef
"""
.. _model-capsule:
Capsule Network
================
**Author**: `Jinjing Zhou`
This tutorial explains how to use DGL library and its language to implement the
`capsule network <http://arxiv.org/abs/1710.09829>`__ proposed by Geoffrey
Hinton and his team. The algorithm aims to provide a better alternative to
current neural network structures. By using DGL library, users can implement
the algorithm in a more intuitive way.
"""
##############################################################################
# Model Overview
# ---------------
# Introduction
# ```````````````````
# Capsule Network were first introduced in 2011 by Geoffrey Hinton, et al., in
# paper `Transforming Autoencoders
# <https://www.cs.toronto.edu/~fritz/absps/transauto6.pdf>`__, but it was only
# a few months ago, in November 2017, that Sara Sabour, Nicholas Frosst, and
# Geoffrey Hinton published a paper called Dynamic Routing between Capsules,
# where they introduced a CapsNet architecture that reached state-of-the-art
# performance on MNIST.
#
# What's a capsule?
# ```````````````````
# In papers, author states that "A capsule is a group of neurons whose activity
# vector represents the instantiation parameters of a specific type of entity
# such as an object or an object part."
#
# Generally speaking, the idea of capsule is to encode all the information
# about the features into a vector form, by substituting scalars in traditional
# neural network with vectors. And use the norm of the vector to represents
# the meaning of original scalars.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f1.png
#
# Dynamic Routing Algorithm
# `````````````````````````````
# Due to the different structure of network, capsules network has different
# operations to calculate results. This figure shows the comparison, drawn by
# `Max Pechyonkin
# <https://medium.com/ai%C2%B3-theory-practice-business/understanding-hintons-capsule-networks-part-ii-how-capsules-work-153b6ade9f66O>`__
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f2.png
# :height: 250px
#
# The key idea is that the output of each capsule is the sum of weighted input vectors.
# We will go into details in the later section with code implementations.
#
# Model Implementations
# -------------------------
##############################################################################
# Algorithm Overview
# ```````````````````````````
#
# .. image:: https://raw.githubusercontent.com/VoVAllen/DGL_Capsule/master/algorithm.png
#
# The main step of routing algorithm is line 4 - 7. In ``DGLGraph`` structure, we consider these steps as a message passing
# procedure.
##############################################################################
# Consider capsule routing as a graph structure
# ````````````````````````````````````````````````````````````````````````````
# We can consider each capsule as a node in a graph, and connect all the nodes between layers.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f3.png
# :height: 150px
#
def
construct_graph
(
self
):
g
=
dgl
.
DGLGraph
()
g
.
add_nodes
(
self
.
input_capsule_num
+
self
.
output_capsule_num
)
input_nodes
=
list
(
range
(
self
.
input_capsule_num
))
output_nodes
=
list
(
range
(
self
.
input_capsule_num
,
self
.
input_capsule_num
+
self
.
output_capsule_num
))
u
,
v
=
[],
[]
for
i
in
input_nodes
:
for
j
in
output_nodes
:
u
.
append
(
i
)
v
.
append
(
j
)
g
.
add_edges
(
u
,
v
)
return
g
,
input_nodes
,
output_nodes
##############################################################################
# Write Message Passing Functions
# ``````````````````````````````````
# Reduce Functions (line 4 - 5)
# .............................................
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f5.png
#
# At this stage, we need to define a reduce function to aggregate the node features
# from layer :math:`l` and weighted sum them into layer :math:`(l+1)`'s node features.
#
# .. note::
# The softmax operation is over dimension :math:`j` instead of :math:`i`.
def
capsule_reduce
(
node
,
msg
):
b_ij_c
,
u_hat
=
msg
[
'b_ij'
],
msg
[
'u_hat'
]
# line 4
c_i
=
F
.
softmax
(
b_ij_c
,
dim
=
0
)
# line 5
s_j
=
(
c_i
.
unsqueeze
(
2
).
unsqueeze
(
3
)
*
u_hat
).
sum
(
dim
=
1
)
return
{
'h'
:
s_j
}
##############################################################################
# Node Update Functions (line 6)
# ......................................................
# Squash the intermediate representations into node features :math:`v_j`
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step6.png
#
def
capsule_update
(
msg
):
v_j
=
squash
(
msg
[
'h'
])
return
{
'h'
:
v_j
}
##############################################################################
# Edge Update Functions (line 7)
# ...........................................................................
# Update the routing parameters by updating edges in graph
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/step7.png
#
def
update_edge
(
u
,
v
,
edge
):
return
{
'b_ij'
:
edge
[
'b_ij'
]
+
(
v
[
'h'
]
*
edge
[
'u_hat'
]).
mean
(
dim
=
1
).
sum
(
dim
=
1
)}
##############################################################################
# Call DGL function to execute algorithm
# ````````````````````````````````````````````````````````````````````````````
# Call ``update_all`` and ``update_edge`` functions to execute the whole algorithms.
# Message function is to define which attributes are needed in further computations
#
def
routing
(
self
):
def
capsule_msg
(
src
,
edge
):
return
{
'b_ij'
:
edge
[
'b_ij'
],
'h'
:
src
[
'h'
],
'u_hat'
:
edge
[
'u_hat'
]}
self
.
g
.
update_all
(
capsule_msg
,
capsule_reduce
,
capsule_update
)
self
.
g
.
update_edge
(
edge_func
=
update_edge
)
##############################################################################
# Forward Function
# ````````````````````````````````````````````````````````````````````````````
# This section shows the whole process of forward process of capsule routing algorithm.
def
forward
(
self
,
x
):
self
.
batch_size
=
x
.
size
(
0
)
u_hat
=
self
.
compute_uhat
(
x
)
self
.
initialize_nodes_and_edges_features
(
u_hat
)
for
i
in
range
(
self
.
num_routing
):
self
.
routing
()
this_layer_nodes_feature
=
self
.
g
.
get_n_repr
()[
'h'
][
self
.
input_capsule_num
:
self
.
input_capsule_num
+
self
.
output_capsule_num
]
return
this_layer_nodes_feature
.
transpose
(
0
,
1
).
unsqueeze
(
1
).
unsqueeze
(
4
).
squeeze
(
1
)
##############################################################################
# Other Workaround
# ````````````````````````````````````````````````````````````````
# Initialization & Affine Transformation
# ..................................................
# This section implements the transformation operation in capsule networks,
# which transform capsule into different dimensions.
# - Pre-compute :math:`\hat{u}_{j|i}`, initialize :math:`b_{ij}` and store them as edge attribute
# - Initialize node features as zero
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/capsule_f4.png
#
def
compute_uhat
(
self
,
x
):
# x is the input vextor with shape [batch_size, input_capsule_dim, input_num]
# Transpose x to [batch_size, input_num, input_capsule_dim]
x
=
x
.
transpose
(
1
,
2
)
# Expand x to [batch_size, input_num, output_num, input_capsule_dim, 1]
x
=
torch
.
stack
([
x
]
*
self
.
output_capsule_num
,
dim
=
2
).
unsqueeze
(
4
)
# Expand W from [input_num, output_num, input_capsule_dim, output_capsule_dim]
# to [batch_size, input_num, output_num, output_capsule_dim, input_capsule_dim]
W
=
self
.
weight
.
expand
(
self
.
batch_size
,
*
self
.
weight
.
size
())
# u_hat's shape is [input_num, output_num, batch_size, output_capsule_dim]
u_hat
=
torch
.
matmul
(
W
,
x
).
permute
(
1
,
2
,
0
,
3
,
4
).
squeeze
().
contiguous
()
return
u_hat
def
initialize_nodes_and_edges_features
(
self
,
u_hat
):
b_ij
=
torch
.
zeros
(
self
.
input_capsule_num
,
self
.
output_capsule_num
).
to
(
self
.
device
)
self
.
g
.
set_e_repr
({
'b_ij'
:
b_ij
.
view
(
-
1
)})
self
.
g
.
set_e_repr
({
'u_hat'
:
u_hat
.
view
(
-
1
,
self
.
batch_size
,
self
.
output_capsule_dim
)})
# Initialize all node features as zero
node_features
=
torch
.
zeros
(
self
.
input_capsule_num
+
self
.
output_capsule_num
,
self
.
batch_size
,
self
.
output_capsule_dim
).
to
(
self
.
device
)
self
.
g
.
set_n_repr
({
'h'
:
node_features
})
##############################################################################
# Squash function
# ..................
# Squashing function is to ensure that short vectors get shrunk to almost zero
# length and long vectors get shrunk to a length slightly below 1. Its norm is
# expected to represents probabilities at some levels.
#
# .. image:: https://raw.githubusercontent.com/dmlc/web-data/master/dgl/tutorials/capsule/squash.png
# :height: 100px
#
def
squash
(
s
,
dim
=
2
):
sq
=
torch
.
sum
(
s
**
2
,
dim
=
dim
,
keepdim
=
True
)
s_std
=
torch
.
sqrt
(
sq
)
s
=
(
sq
/
(
1.0
+
sq
))
*
(
s
/
s_std
)
return
s
##############################################################################
# General Setup
# .................
import
dgl
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
class
DGLDigitCapsuleLayer
(
nn
.
Module
):
def
__init__
(
self
,
input_capsule_dim
=
8
,
input_capsule_num
=
1152
,
output_capsule_num
=
10
,
output_capsule_dim
=
16
,
num_routing
=
3
,
device
=
'cpu'
):
super
(
DGLDigitCapsuleLayer
,
self
).
__init__
()
self
.
device
=
device
self
.
input_capsule_dim
=
input_capsule_dim
self
.
input_capsule_num
=
input_capsule_num
self
.
output_capsule_dim
=
output_capsule_dim
self
.
output_capsule_num
=
output_capsule_num
self
.
num_routing
=
num_routing
self
.
weight
=
nn
.
Parameter
(
torch
.
randn
(
input_capsule_num
,
output_capsule_num
,
output_capsule_dim
,
input_capsule_dim
))
self
.
g
,
self
.
input_nodes
,
self
.
output_nodes
=
self
.
construct_graph
()
# This section is for defining class in multiple cells.
DGLDigitCapsuleLayer
.
construct_graph
=
construct_graph
DGLDigitCapsuleLayer
.
forward
=
forward
DGLDigitCapsuleLayer
.
routing
=
routing
DGLDigitCapsuleLayer
.
compute_uhat
=
compute_uhat
DGLDigitCapsuleLayer
.
initialize_nodes_and_edges_features
=
initialize_nodes_and_edges_features
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment