"tests/python/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "ae4a5b739412d817da36b86c858f00e9605022a9"
Commit 16cc5287 authored by VoVAllen's avatar VoVAllen Committed by Minjie Wang
Browse files

[Doc] Improve Capsule with Jinyang & Fix wrong tutorial level layout (#236)

* improve capsule tutorial with jinyang

* fix wrong layout of second-level tutorial

* delete transformer
parent dafe4671
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
# Configuration file for the Sphinx documentation builder. # Configuration file for the Sphinx documentation builder.
# #
# This file does only contain a selection of the most common options. For a # This file does only contain a selection of the most common options. For a
# full list see the documentation: # full list see the documentation:
# http://www.sphinx-doc.org/en/master/config # http://www.sphinx-doc.org/en/master/config
# -- Path setup -------------------------------------------------------------- # -- Path setup --------------------------------------------------------------
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here. # documentation root, use os.path.abspath to make it absolute, like shown here.
# #
import os import os
import sys import sys
sys.path.insert(0, os.path.abspath('../../python')) sys.path.insert(0, os.path.abspath('../../python'))
# -- Project information ----------------------------------------------------- # -- Project information -----------------------------------------------------
project = 'DGL' project = 'DGL'
copyright = '2018, DGL Team' copyright = '2018, DGL Team'
author = 'DGL Team' author = 'DGL Team'
# The short X.Y version # The short X.Y version
version = '0.0.1' version = '0.0.1'
# The full version, including alpha/beta/rc tags # The full version, including alpha/beta/rc tags
release = '0.0.1' release = '0.0.1'
# -- General configuration --------------------------------------------------- # -- General configuration ---------------------------------------------------
# If your documentation needs a minimal Sphinx version, state it here. # If your documentation needs a minimal Sphinx version, state it here.
# #
# needs_sphinx = '1.0' # needs_sphinx = '1.0'
# Add any Sphinx extension module names here, as strings. They can be # Add any Sphinx extension module names here, as strings. They can be
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones. # ones.
extensions = [ extensions = [
'sphinx.ext.autodoc', 'sphinx.ext.autodoc',
'sphinx.ext.autosummary', 'sphinx.ext.autosummary',
'sphinx.ext.coverage', 'sphinx.ext.coverage',
'sphinx.ext.mathjax', 'sphinx.ext.mathjax',
'sphinx.ext.napoleon', 'sphinx.ext.napoleon',
'sphinx.ext.viewcode', 'sphinx.ext.viewcode',
'sphinx.ext.intersphinx', 'sphinx.ext.intersphinx',
'sphinx.ext.graphviz', 'sphinx.ext.graphviz',
'sphinx_gallery.gen_gallery', 'sphinx_gallery.gen_gallery',
] ]
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ['_templates']
# The suffix(es) of source filenames. # The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string: # You can specify multiple suffix as a list of string:
# #
source_suffix = ['.rst', '.md'] source_suffix = ['.rst', '.md']
# The master toctree document. # The master toctree document.
master_doc = 'index' master_doc = 'index'
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
# #
# This is also used if you do content translation via gettext catalogs. # This is also used if you do content translation via gettext catalogs.
# Usually you set "language" from the command line for these cases. # Usually you set "language" from the command line for these cases.
language = None language = None
# List of patterns, relative to source directory, that match files and # List of patterns, relative to source directory, that match files and
# directories to ignore when looking for source files. # directories to ignore when looking for source files.
# This pattern also affects html_static_path and html_extra_path. # This pattern also affects html_static_path and html_extra_path.
exclude_patterns = [] exclude_patterns = []
# The name of the Pygments (syntax highlighting) style to use. # The name of the Pygments (syntax highlighting) style to use.
pygments_style = None pygments_style = None
# -- Options for HTML output ------------------------------------------------- # -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for # The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes. # a list of builtin themes.
# #
html_theme = 'sphinx_rtd_theme' html_theme = 'sphinx_rtd_theme'
# Theme options are theme-specific and customize the look and feel of a theme # Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the # further. For a list of options available for each theme, see the
# documentation. # documentation.
# #
# html_theme_options = {} # html_theme_options = {}
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css". # so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static'] html_static_path = ['_static']
# Custom sidebar templates, must be a dictionary that maps document names # Custom sidebar templates, must be a dictionary that maps document names
# to template names. # to template names.
# #
# The default sidebars (for documents that don't match any pattern) are # The default sidebars (for documents that don't match any pattern) are
# defined by theme itself. Builtin themes are using these templates by # defined by theme itself. Builtin themes are using these templates by
# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
# 'searchbox.html']``. # 'searchbox.html']``.
# #
# html_sidebars = {} # html_sidebars = {}
# -- Options for HTMLHelp output --------------------------------------------- # -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder. # Output file base name for HTML help builder.
htmlhelp_basename = 'dgldoc' htmlhelp_basename = 'dgldoc'
# -- Options for LaTeX output ------------------------------------------------ # -- Options for LaTeX output ------------------------------------------------
latex_elements = { latex_elements = {
# The paper size ('letterpaper' or 'a4paper'). # The paper size ('letterpaper' or 'a4paper').
# #
# 'papersize': 'letterpaper', # 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt'). # The font size ('10pt', '11pt' or '12pt').
# #
# 'pointsize': '10pt', # 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble. # Additional stuff for the LaTeX preamble.
# #
# 'preamble': '', # 'preamble': '',
# Latex figure (float) alignment # Latex figure (float) alignment
# #
# 'figure_align': 'htbp', # 'figure_align': 'htbp',
} }
# Grouping the document tree into LaTeX files. List of tuples # Grouping the document tree into LaTeX files. List of tuples
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [
(master_doc, 'dgl.tex', 'DGL Documentation', (master_doc, 'dgl.tex', 'DGL Documentation',
'DGL Team', 'manual'), 'DGL Team', 'manual'),
] ]
# -- Options for manual page output ------------------------------------------ # -- Options for manual page output ------------------------------------------
# One entry per manual page. List of tuples # One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section). # (source start file, name, description, authors, manual section).
man_pages = [ man_pages = [
(master_doc, 'dgl', 'DGL Documentation', (master_doc, 'dgl', 'DGL Documentation',
[author], 1) [author], 1)
] ]
# -- Options for Texinfo output ---------------------------------------------- # -- Options for Texinfo output ----------------------------------------------
# Grouping the document tree into Texinfo files. List of tuples # Grouping the document tree into Texinfo files. List of tuples
# (source start file, target name, title, author, # (source start file, target name, title, author,
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [
(master_doc, 'dgl', 'DGL Documentation', (master_doc, 'dgl', 'DGL Documentation',
author, 'dgl', 'Library for deep learning on graphs.', author, 'dgl', 'Library for deep learning on graphs.',
'Miscellaneous'), 'Miscellaneous'),
] ]
# -- Options for Epub output ------------------------------------------------- # -- Options for Epub output -------------------------------------------------
# Bibliographic Dublin Core info. # Bibliographic Dublin Core info.
epub_title = project epub_title = project
# The unique identifier of the text. This can be a ISBN number # The unique identifier of the text. This can be a ISBN number
# or the project homepage. # or the project homepage.
# #
# epub_identifier = '' # epub_identifier = ''
# A unique identification for the text. # A unique identification for the text.
# #
# epub_uid = '' # epub_uid = ''
# A list of files that should not be packed into the epub file. # A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html'] epub_exclude_files = ['search.html']
# -- Extension configuration ------------------------------------------------- # -- Extension configuration -------------------------------------------------
autosummary_generate = True autosummary_generate = True
intersphinx_mapping = { intersphinx_mapping = {
'python': ('https://docs.python.org/{.major}'.format(sys.version_info), None), 'python': ('https://docs.python.org/{.major}'.format(sys.version_info), None),
'numpy': ('http://docs.scipy.org/doc/numpy/', None), 'numpy': ('http://docs.scipy.org/doc/numpy/', None),
'scipy': ('http://docs.scipy.org/doc/scipy/reference', None), 'scipy': ('http://docs.scipy.org/doc/scipy/reference', None),
'matplotlib': ('http://matplotlib.org/', None), 'matplotlib': ('http://matplotlib.org/', None),
'networkx' : ('https://networkx.github.io/documentation/stable', None), 'networkx' : ('https://networkx.github.io/documentation/stable', None),
} }
# sphinx gallery configurations # sphinx gallery configurations
from sphinx_gallery.sorting import FileNameSortKey from sphinx_gallery.sorting import FileNameSortKey
examples_dirs = ['../../tutorials'] # path to find sources examples_dirs = ['../../tutorials/basics','../../tutorials/models'] # path to find sources
gallery_dirs = ['tutorials'] # path to generate docs gallery_dirs = ['tutorials/basics','tutorials/models'] # path to generate docs
reference_url = { reference_url = {
'dgl' : None, 'dgl' : None,
'numpy': 'http://docs.scipy.org/doc/numpy/', 'numpy': 'http://docs.scipy.org/doc/numpy/',
'scipy': 'http://docs.scipy.org/doc/scipy/reference', 'scipy': 'http://docs.scipy.org/doc/scipy/reference',
'matplotlib': 'http://matplotlib.org/', 'matplotlib': 'http://matplotlib.org/',
'networkx' : 'https://networkx.github.io/documentation/stable', 'networkx' : 'https://networkx.github.io/documentation/stable',
} }
sphinx_gallery_conf = { sphinx_gallery_conf = {
'backreferences_dir' : 'generated/backreferences', 'backreferences_dir' : 'generated/backreferences',
'doc_module' : ('dgl', 'numpy'), 'doc_module' : ('dgl', 'numpy'),
'examples_dirs' : examples_dirs, 'examples_dirs' : examples_dirs,
'gallery_dirs' : gallery_dirs, 'gallery_dirs' : gallery_dirs,
'within_subsection_order' : FileNameSortKey, 'within_subsection_order' : FileNameSortKey,
'filename_pattern' : '.py', 'filename_pattern' : '.py',
} }
...@@ -65,7 +65,8 @@ credit, see `here <https://www.dgl.ai/ack>`_. ...@@ -65,7 +65,8 @@ credit, see `here <https://www.dgl.ai/ack>`_.
:caption: Tutorials :caption: Tutorials
:glob: :glob:
tutorials/index tutorials/basics/index
tutorials/models/index
.. toctree:: .. toctree::
:maxdepth: 2 :maxdepth: 2
......
""" """
.. currentmodule:: dgl .. currentmodule:: dgl
DGL Basics DGL Basics
========== ==========
**Author**: `Minjie Wang <https://jermainewang.github.io/>`_, Quan Gan, Yu Gai, **Author**: `Minjie Wang <https://jermainewang.github.io/>`_, Quan Gan, Yu Gai,
Zheng Zhang Zheng Zhang
The Goal of this tutorial: The Goal of this tutorial:
* To create a graph. * To create a graph.
* To read and write node and edge representations. * To read and write node and edge representations.
""" """
############################################################################### ###############################################################################
# Graph Creation # Graph Creation
# -------------- # --------------
# The design of :class:`DGLGraph` was influenced by other graph libraries. Indeed, # The design of :class:`DGLGraph` was influenced by other graph libraries. Indeed,
# you can create a graph from networkx, and convert it into a :class:`DGLGraph` and # you can create a graph from networkx, and convert it into a :class:`DGLGraph` and
# vice versa: # vice versa:
import networkx as nx import networkx as nx
import dgl import dgl
g_nx = nx.petersen_graph() g_nx = nx.petersen_graph()
g_dgl = dgl.DGLGraph(g_nx) g_dgl = dgl.DGLGraph(g_nx)
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
plt.subplot(121) plt.subplot(121)
nx.draw(g_nx, with_labels=True) nx.draw(g_nx, with_labels=True)
plt.subplot(122) plt.subplot(122)
nx.draw(g_dgl.to_networkx(), with_labels=True) nx.draw(g_dgl.to_networkx(), with_labels=True)
plt.show() plt.show()
############################################################################### ###############################################################################
# They are the same graph, except that :class:`DGLGraph` is *always* directional. # They are the same graph, except that :class:`DGLGraph` is *always* directional.
# #
# One can also create a graph by calling DGL's own interface. # One can also create a graph by calling DGL's own interface.
# #
# Now let's build a star graph. :class:`DGLGraph` nodes are consecutive range of # Now let's build a star graph. :class:`DGLGraph` nodes are consecutive range of
# integers between 0 and :func:`number_of_nodes() <DGLGraph.number_of_nodes>` # integers between 0 and :func:`number_of_nodes() <DGLGraph.number_of_nodes>`
# and can grow by calling :func:`add_nodes <DGLGraph.add_nodes>`. # and can grow by calling :func:`add_nodes <DGLGraph.add_nodes>`.
# :class:`DGLGraph` edges are in order of their additions. Note that # :class:`DGLGraph` edges are in order of their additions. Note that
# edges are accessed in much the same way as nodes, with one extra feature # edges are accessed in much the same way as nodes, with one extra feature
# of *edge broadcasting*: # of *edge broadcasting*:
import dgl import dgl
import torch as th import torch as th
g = dgl.DGLGraph() g = dgl.DGLGraph()
g.add_nodes(10) g.add_nodes(10)
# a couple edges one-by-one # a couple edges one-by-one
for i in range(1, 4): for i in range(1, 4):
g.add_edge(i, 0) g.add_edge(i, 0)
# a few more with a paired list # a few more with a paired list
src = list(range(5, 8)); dst = [0]*3 src = list(range(5, 8)); dst = [0]*3
g.add_edges(src, dst) g.add_edges(src, dst)
# finish with a pair of tensors # finish with a pair of tensors
src = th.tensor([8, 9]); dst = th.tensor([0, 0]) src = th.tensor([8, 9]); dst = th.tensor([0, 0])
g.add_edges(src, dst) g.add_edges(src, dst)
# edge broadcasting will do star graph in one go! # edge broadcasting will do star graph in one go!
g.clear(); g.add_nodes(10) g.clear(); g.add_nodes(10)
src = th.tensor(list(range(1, 10))); src = th.tensor(list(range(1, 10)));
g.add_edges(src, 0) g.add_edges(src, 0)
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
nx.draw(g.to_networkx(), with_labels=True) nx.draw(g.to_networkx(), with_labels=True)
plt.show() plt.show()
############################################################################### ###############################################################################
# Feature Assignment # Feature Assignment
# ------------------ # ------------------
# One can also assign features to nodes and edges of a :class:`DGLGraph`. The # One can also assign features to nodes and edges of a :class:`DGLGraph`. The
# features are represented as dictionary of names (strings) and tensors, # features are represented as dictionary of names (strings) and tensors,
# called **fields**. # called **fields**.
# #
# The following code snippet assigns each node a vector (len=3). # The following code snippet assigns each node a vector (len=3).
# #
# .. note:: # .. note::
# #
# DGL aims to be framework-agnostic, and currently it supports PyTorch and # DGL aims to be framework-agnostic, and currently it supports PyTorch and
# MXNet tensors. From now on, we use PyTorch as an example. # MXNet tensors. From now on, we use PyTorch as an example.
import dgl import dgl
import torch as th import torch as th
x = th.randn(10, 3) x = th.randn(10, 3)
g.ndata['x'] = x g.ndata['x'] = x
############################################################################### ###############################################################################
# :func:`ndata <DGLGraph.ndata>` is a syntax sugar to access states of all nodes, # :func:`ndata <DGLGraph.ndata>` is a syntax sugar to access states of all nodes,
# states are stored # states are stored
# in a container ``data`` that hosts user defined dictionary. # in a container ``data`` that hosts user defined dictionary.
print(g.ndata['x'] == g.nodes[:].data['x']) print(g.ndata['x'] == g.nodes[:].data['x'])
# access node set with integer, list, or integer tensor # access node set with integer, list, or integer tensor
g.nodes[0].data['x'] = th.zeros(1, 3) g.nodes[0].data['x'] = th.zeros(1, 3)
g.nodes[[0, 1, 2]].data['x'] = th.zeros(3, 3) g.nodes[[0, 1, 2]].data['x'] = th.zeros(3, 3)
g.nodes[th.tensor([0, 1, 2])].data['x'] = th.zeros(3, 3) g.nodes[th.tensor([0, 1, 2])].data['x'] = th.zeros(3, 3)
############################################################################### ###############################################################################
# Assigning edge features is in a similar fashion to that of node features, # Assigning edge features is in a similar fashion to that of node features,
# except that one can also do it by specifying endpoints of the edges. # except that one can also do it by specifying endpoints of the edges.
g.edata['w'] = th.randn(9, 2) g.edata['w'] = th.randn(9, 2)
# access edge set with IDs in integer, list, or integer tensor # access edge set with IDs in integer, list, or integer tensor
g.edges[1].data['w'] = th.randn(1, 2) g.edges[1].data['w'] = th.randn(1, 2)
g.edges[[0, 1, 2]].data['w'] = th.zeros(3, 2) g.edges[[0, 1, 2]].data['w'] = th.zeros(3, 2)
g.edges[th.tensor([0, 1, 2])].data['w'] = th.zeros(3, 2) g.edges[th.tensor([0, 1, 2])].data['w'] = th.zeros(3, 2)
# one can also access the edges by giving endpoints # one can also access the edges by giving endpoints
g.edges[1, 0].data['w'] = th.ones(1, 2) # edge 1 -> 0 g.edges[1, 0].data['w'] = th.ones(1, 2) # edge 1 -> 0
g.edges[[1, 2, 3], [0, 0, 0]].data['w'] = th.ones(3, 2) # edges [1, 2, 3] -> 0 g.edges[[1, 2, 3], [0, 0, 0]].data['w'] = th.ones(3, 2) # edges [1, 2, 3] -> 0
############################################################################### ###############################################################################
# After assignments, each node/edge field will be associated with a scheme # After assignments, each node/edge field will be associated with a scheme
# containing the shape and data type (dtype) of its field value. # containing the shape and data type (dtype) of its field value.
print(g.node_attr_schemes()) print(g.node_attr_schemes())
g.ndata['x'] = th.zeros((10, 4)) g.ndata['x'] = th.zeros((10, 4))
print(g.node_attr_schemes()) print(g.node_attr_schemes())
############################################################################### ###############################################################################
# One can also remove node/edge states from the graph. This is particularly # One can also remove node/edge states from the graph. This is particularly
# useful to save memory during inference. # useful to save memory during inference.
g.ndata.pop('x') g.ndata.pop('x')
g.edata.pop('w') g.edata.pop('w')
############################################################################### ###############################################################################
# Multigraphs # Multigraphs
# ~~~~~~~~~~~ # ~~~~~~~~~~~
# Many graph applications need multi-edges. To enable this, construct :class:`DGLGraph` # Many graph applications need multi-edges. To enable this, construct :class:`DGLGraph`
# with ``multigraph=True``. # with ``multigraph=True``.
g_multi = dgl.DGLGraph(multigraph=True) g_multi = dgl.DGLGraph(multigraph=True)
g_multi.add_nodes(10) g_multi.add_nodes(10)
g_multi.ndata['x'] = th.randn(10, 2) g_multi.ndata['x'] = th.randn(10, 2)
g_multi.add_edges(list(range(1, 10)), 0) g_multi.add_edges(list(range(1, 10)), 0)
g_multi.add_edge(1, 0) # two edges on 1->0 g_multi.add_edge(1, 0) # two edges on 1->0
g_multi.edata['w'] = th.randn(10, 2) g_multi.edata['w'] = th.randn(10, 2)
g_multi.edges[1].data['w'] = th.zeros(1, 2) g_multi.edges[1].data['w'] = th.zeros(1, 2)
print(g_multi.edges()) print(g_multi.edges())
############################################################################### ###############################################################################
# An edge in multi-graph cannot be uniquely identified using its incident nodes # An edge in multi-graph cannot be uniquely identified using its incident nodes
# :math:`u` and :math:`v`; query their edge ids use ``edge_id`` interface. # :math:`u` and :math:`v`; query their edge ids use ``edge_id`` interface.
eid_10 = g_multi.edge_id(1, 0) eid_10 = g_multi.edge_id(1, 0)
g_multi.edges[eid_10].data['w'] = th.ones(len(eid_10), 2) g_multi.edges[eid_10].data['w'] = th.ones(len(eid_10), 2)
print(g_multi.edata['w']) print(g_multi.edata['w'])
############################################################################### ###############################################################################
# .. note:: # .. note::
# #
# * Nodes and edges can be added but not removed; we will support removal in # * Nodes and edges can be added but not removed; we will support removal in
# the future. # the future.
# * Updating a feature of different schemes raise error on indivdual node (or # * Updating a feature of different schemes raise error on indivdual node (or
# node subset). # node subset).
############################################################################### ###############################################################################
# Next steps # Next steps
# ---------- # ----------
# In the :doc:`next tutorial <3_pagerank>`, we will go through the # In the :doc:`next tutorial <3_pagerank>`, we will go through the
# DGL message passing interface by implementing PageRank. # DGL message passing interface by implementing PageRank.
""" """
.. currentmodule:: dgl .. currentmodule:: dgl
PageRank with DGL Message Passing PageRank with DGL Message Passing
================================= =================================
**Author**: `Minjie Wang <https://jermainewang.github.io/>`_, Quan Gan, Yu Gai, **Author**: `Minjie Wang <https://jermainewang.github.io/>`_, Quan Gan, Yu Gai,
Zheng Zhang Zheng Zhang
In this section we illustrate the usage of different levels of message In this section we illustrate the usage of different levels of message
passing API with PageRank on a small graph. In DGL, the message passing and passing API with PageRank on a small graph. In DGL, the message passing and
feature transformations are all **User-Defined Functions** (UDFs). feature transformations are all **User-Defined Functions** (UDFs).
The goal of this tutorial: to implement PageRank using DGL message passing The goal of this tutorial: to implement PageRank using DGL message passing
interface. interface.
""" """
############################################################################### ###############################################################################
# The PageRank Algorithm # The PageRank Algorithm
# ---------------------- # ----------------------
# In each iteration of PageRank, every node (web page) first scatters its # In each iteration of PageRank, every node (web page) first scatters its
# PageRank value uniformly to its downstream nodes. The new PageRank value of # PageRank value uniformly to its downstream nodes. The new PageRank value of
# each node is computed by aggregating the received PageRank values from its # each node is computed by aggregating the received PageRank values from its
# neighbors, which is then adjusted by the damping factor: # neighbors, which is then adjusted by the damping factor:
# #
# .. math:: # .. math::
# #
# PV(u) = \frac{1-d}{N} + d \times \sum_{v \in \mathcal{N}(u)} # PV(u) = \frac{1-d}{N} + d \times \sum_{v \in \mathcal{N}(u)}
# \frac{PV(v)}{D(v)} # \frac{PV(v)}{D(v)}
# #
# where :math:`N` is the number of nodes in the graph; :math:`D(v)` is the # where :math:`N` is the number of nodes in the graph; :math:`D(v)` is the
# out-degree of a node :math:`v`; and :math:`\mathcal{N}(u)` is the neighbor # out-degree of a node :math:`v`; and :math:`\mathcal{N}(u)` is the neighbor
# nodes. # nodes.
############################################################################### ###############################################################################
# A naive implementation # A naive implementation
# ---------------------- # ----------------------
# Let us first create a graph with 100 nodes with NetworkX and convert it to a # Let us first create a graph with 100 nodes with NetworkX and convert it to a
# :class:`DGLGraph`: # :class:`DGLGraph`:
import networkx as nx import networkx as nx
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import torch import torch
import dgl import dgl
N = 100 # number of nodes N = 100 # number of nodes
DAMP = 0.85 # damping factor DAMP = 0.85 # damping factor
K = 10 # number of iterations K = 10 # number of iterations
g = nx.nx.erdos_renyi_graph(N, 0.1) g = nx.nx.erdos_renyi_graph(N, 0.1)
g = dgl.DGLGraph(g) g = dgl.DGLGraph(g)
nx.draw(g.to_networkx(), node_size=50, node_color=[[.5, .5, .5,]]) nx.draw(g.to_networkx(), node_size=50, node_color=[[.5, .5, .5,]])
plt.show() plt.show()
############################################################################### ###############################################################################
# According to the algorithm, PageRank consists of two phases in a typical # According to the algorithm, PageRank consists of two phases in a typical
# scatter-gather pattern. We first initialize the PageRank value of each node # scatter-gather pattern. We first initialize the PageRank value of each node
# to :math:`\frac{1}{N}` and store each node's out-degree as a node feature: # to :math:`\frac{1}{N}` and store each node's out-degree as a node feature:
g.ndata['pv'] = torch.ones(N) / N g.ndata['pv'] = torch.ones(N) / N
g.ndata['deg'] = g.out_degrees(g.nodes()).float() g.ndata['deg'] = g.out_degrees(g.nodes()).float()
############################################################################### ###############################################################################
# We then define the message function, which divides every node's PageRank # We then define the message function, which divides every node's PageRank
# value by its out-degree and passes the result as message to its neighbors: # value by its out-degree and passes the result as message to its neighbors:
def pagerank_message_func(edges): def pagerank_message_func(edges):
return {'pv' : edges.src['pv'] / edges.src['deg']} return {'pv' : edges.src['pv'] / edges.src['deg']}
############################################################################### ###############################################################################
# In DGL, the message functions are expressed as **Edge UDFs**. Edge UDFs # In DGL, the message functions are expressed as **Edge UDFs**. Edge UDFs
# take in a single argument ``edges``. It has three members ``src``, ``dst``, # take in a single argument ``edges``. It has three members ``src``, ``dst``,
# and ``data`` for accessing source node features, destination node features, # and ``data`` for accessing source node features, destination node features,
# and edge features respectively. Here, the function computes messages only # and edge features respectively. Here, the function computes messages only
# from source node features. # from source node features.
# #
# Next, we define the reduce function, which removes and aggregates the # Next, we define the reduce function, which removes and aggregates the
# messages from its ``mailbox``, and computes its new PageRank value: # messages from its ``mailbox``, and computes its new PageRank value:
def pagerank_reduce_func(nodes): def pagerank_reduce_func(nodes):
msgs = torch.sum(nodes.mailbox['pv'], dim=1) msgs = torch.sum(nodes.mailbox['pv'], dim=1)
pv = (1 - DAMP) / N + DAMP * msgs pv = (1 - DAMP) / N + DAMP * msgs
return {'pv' : pv} return {'pv' : pv}
############################################################################### ###############################################################################
# The reduce functions are **Node UDFs**. Node UDFs have a single argument # The reduce functions are **Node UDFs**. Node UDFs have a single argument
# ``nodes``, which has two members ``data`` and ``mailbox``. ``data`` # ``nodes``, which has two members ``data`` and ``mailbox``. ``data``
# contains the node features while ``mailbox`` contains all incoming message # contains the node features while ``mailbox`` contains all incoming message
# features, stacked along the second dimension (hence the ``dim=1`` argument). # features, stacked along the second dimension (hence the ``dim=1`` argument).
# #
# The message UDF works on a batch of edges, whereas the reduce UDF works on # The message UDF works on a batch of edges, whereas the reduce UDF works on
# a batch of edges but outputs a batch of nodes. Their relationships are as # a batch of edges but outputs a batch of nodes. Their relationships are as
# follows: # follows:
# #
# .. image:: https://i.imgur.com/kIMiuFb.png # .. image:: https://i.imgur.com/kIMiuFb.png
# #
# We register the message function and reduce function, which will be called # We register the message function and reduce function, which will be called
# later by DGL. # later by DGL.
g.register_message_func(pagerank_message_func) g.register_message_func(pagerank_message_func)
g.register_reduce_func(pagerank_reduce_func) g.register_reduce_func(pagerank_reduce_func)
############################################################################### ###############################################################################
# The algorithm is then very straight-forward. Here is the code for one # The algorithm is then very straight-forward. Here is the code for one
# PageRank iteration: # PageRank iteration:
def pagerank_naive(g): def pagerank_naive(g):
# Phase #1: send out messages along all edges. # Phase #1: send out messages along all edges.
for u, v in zip(*g.edges()): for u, v in zip(*g.edges()):
g.send((u, v)) g.send((u, v))
# Phase #2: receive messages to compute new PageRank values. # Phase #2: receive messages to compute new PageRank values.
for v in g.nodes(): for v in g.nodes():
g.recv(v) g.recv(v)
############################################################################### ###############################################################################
# Improvement with batching semantics # Improvement with batching semantics
# ----------------------------------- # -----------------------------------
# The above code does not scale to large graph because it iterates over all # The above code does not scale to large graph because it iterates over all
# the nodes. DGL solves this by letting user compute on a *batch* of nodes or # the nodes. DGL solves this by letting user compute on a *batch* of nodes or
# edges. For example, the following codes trigger message and reduce functions # edges. For example, the following codes trigger message and reduce functions
# on multiple nodes and edges at once. # on multiple nodes and edges at once.
def pagerank_batch(g): def pagerank_batch(g):
g.send(g.edges()) g.send(g.edges())
g.recv(g.nodes()) g.recv(g.nodes())
############################################################################### ###############################################################################
# Note that we are still using the same reduce function ``pagerank_reduce_func``, # Note that we are still using the same reduce function ``pagerank_reduce_func``,
# where ``nodes.mailbox['pv']`` is a *single* tensor, stacking the incoming # where ``nodes.mailbox['pv']`` is a *single* tensor, stacking the incoming
# messages along the second dimension. # messages along the second dimension.
# #
# Naturally, one will wonder if this is even possible to perform reduce on all # Naturally, one will wonder if this is even possible to perform reduce on all
# nodes in parallel, since each node may have different number of incoming # nodes in parallel, since each node may have different number of incoming
# messages and one cannot really "stack" tensors of different lengths together. # messages and one cannot really "stack" tensors of different lengths together.
# In general, DGL solves the problem by grouping the nodes by the number of # In general, DGL solves the problem by grouping the nodes by the number of
# incoming messages, and calling the reduce function for each group. # incoming messages, and calling the reduce function for each group.
############################################################################### ###############################################################################
# More improvement with higher level APIs # More improvement with higher level APIs
# --------------------------------------- # ---------------------------------------
# DGL provides many routines that combines basic ``send`` and ``recv`` in # DGL provides many routines that combines basic ``send`` and ``recv`` in
# various ways. They are called **level-2 APIs**. For example, the PageRank # various ways. They are called **level-2 APIs**. For example, the PageRank
# example can be further simplified as follows: # example can be further simplified as follows:
def pagerank_level2(g): def pagerank_level2(g):
g.update_all() g.update_all()
############################################################################### ###############################################################################
# Besides ``update_all``, we also have ``pull``, ``push``, and ``send_and_recv`` # Besides ``update_all``, we also have ``pull``, ``push``, and ``send_and_recv``
# in this level-2 category. Please refer to the :doc:`API reference <../api/python/graph>` # in this level-2 category. Please refer to the :doc:`API reference <../../api/python/graph>`
# for more details. # for more details.
############################################################################### ###############################################################################
# Even more improvement with DGL builtin functions # Even more improvement with DGL builtin functions
# ------------------------------------------------ # ------------------------------------------------
# As some of the message and reduce functions are very commonly used, DGL also # As some of the message and reduce functions are very commonly used, DGL also
# provides **builtin functions**. For example, two builtin functions can be # provides **builtin functions**. For example, two builtin functions can be
# used in the PageRank example. # used in the PageRank example.
# #
# * :func:`dgl.function.copy_src(src, out) <function.copy_src>` # * :func:`dgl.function.copy_src(src, out) <function.copy_src>`
# is an edge UDF that computes the # is an edge UDF that computes the
# output using the source node feature data. User needs to specify the name of # output using the source node feature data. User needs to specify the name of
# the source feature data (``src``) and the output name (``out``). # the source feature data (``src``) and the output name (``out``).
# #
# * :func:`dgl.function.sum(msg, out) <function.sum>` is a node UDF # * :func:`dgl.function.sum(msg, out) <function.sum>` is a node UDF
# that sums the messages in # that sums the messages in
# the node's mailbox. User needs to specify the message name (``msg``) and the # the node's mailbox. User needs to specify the message name (``msg``) and the
# output name (``out``). # output name (``out``).
# #
# For example, the PageRank example can be rewritten as following: # For example, the PageRank example can be rewritten as following:
import dgl.function as fn import dgl.function as fn
def pagerank_builtin(g): def pagerank_builtin(g):
g.ndata['pv'] = g.ndata['pv'] / g.ndata['deg'] g.ndata['pv'] = g.ndata['pv'] / g.ndata['deg']
g.update_all(message_func=fn.copy_src(src='pv', out='m'), g.update_all(message_func=fn.copy_src(src='pv', out='m'),
reduce_func=fn.sum(msg='m',out='m_sum')) reduce_func=fn.sum(msg='m',out='m_sum'))
g.ndata['pv'] = (1 - DAMP) / N + DAMP * g.ndata['m_sum'] g.ndata['pv'] = (1 - DAMP) / N + DAMP * g.ndata['m_sum']
############################################################################### ###############################################################################
# Here, we directly provide the UDFs to the :func:`update_all <DGLGraph.update_all>` # Here, we directly provide the UDFs to the :func:`update_all <DGLGraph.update_all>`
# as its arguments. # as its arguments.
# This will override the previously registered UDFs. # This will override the previously registered UDFs.
# #
# In addition to cleaner code, using builtin functions also gives DGL the # In addition to cleaner code, using builtin functions also gives DGL the
# opportunity to fuse operations together, resulting in faster execution. For # opportunity to fuse operations together, resulting in faster execution. For
# example, DGL will fuse the ``copy_src`` message function and ``sum`` reduce # example, DGL will fuse the ``copy_src`` message function and ``sum`` reduce
# function into one sparse matrix-vector (spMV) multiplication. # function into one sparse matrix-vector (spMV) multiplication.
# #
# `This section <spmv_>`_ describes why spMV can speed up the scatter-gather # `This section <spmv_>`_ describes why spMV can speed up the scatter-gather
# phase in PageRank. For more details about the builtin functions in DGL, # phase in PageRank. For more details about the builtin functions in DGL,
# please read the :doc:`API reference <../api/python/function>`. # please read the :doc:`API reference <../../api/python/function>`.
# #
# You can also download and run the codes to feel the difference. # You can also download and run the codes to feel the difference.
for k in range(K): for k in range(K):
# Uncomment the corresponding line to select different version. # Uncomment the corresponding line to select different version.
# pagerank_naive(g) # pagerank_naive(g)
# pagerank_batch(g) # pagerank_batch(g)
# pagerank_level2(g) # pagerank_level2(g)
pagerank_builtin(g) pagerank_builtin(g)
print(g.ndata['pv']) print(g.ndata['pv'])
############################################################################### ###############################################################################
# .. _spmv: # .. _spmv:
# #
# Using spMV for PageRank # Using spMV for PageRank
# ----------------------- # -----------------------
# Using builtin functions allows DGL to understand the semantics of UDFs and # Using builtin functions allows DGL to understand the semantics of UDFs and
# thus allows more efficient implementation for you. For example, in the case # thus allows more efficient implementation for you. For example, in the case
# of PageRank, one common trick to accelerate it is using its linear algebra # of PageRank, one common trick to accelerate it is using its linear algebra
# form. # form.
# #
# .. math:: # .. math::
# #
# \mathbf{R}^{k} = \frac{1-d}{N} \mathbf{1} + d \mathbf{A}*\mathbf{R}^{k-1} # \mathbf{R}^{k} = \frac{1-d}{N} \mathbf{1} + d \mathbf{A}*\mathbf{R}^{k-1}
# #
# Here, :math:`\mathbf{R}^k` is the vector of the PageRank values of all nodes # Here, :math:`\mathbf{R}^k` is the vector of the PageRank values of all nodes
# at iteration :math:`k`; :math:`\mathbf{A}` is the sparse adjacency matrix # at iteration :math:`k`; :math:`\mathbf{A}` is the sparse adjacency matrix
# of the graph. # of the graph.
# Computing this equation is quite efficient because there exists efficient # Computing this equation is quite efficient because there exists efficient
# GPU kernel for the *sparse-matrix-vector-multiplication* (spMV). DGL # GPU kernel for the *sparse-matrix-vector-multiplication* (spMV). DGL
# detects whether such optimization is available through the builtin # detects whether such optimization is available through the builtin
# functions. If the certain combination of builtins can be mapped to a spMV # functions. If the certain combination of builtins can be mapped to a spMV
# kernel (e.g. the pagerank example), DGL will use it automatically. As a # kernel (e.g. the pagerank example), DGL will use it automatically. As a
# result, *we recommend using builtin functions whenever it is possible*. # result, *we recommend using builtin functions whenever it is possible*.
############################################################################### ###############################################################################
# Next steps # Next steps
# ---------- # ----------
# Check out :doc:`GCN <models/1_gcn>` and :doc:`Capsule <models/2_capsule>` # Check out :doc:`GCN <../models/1_gnn/1_gcn>` and :doc:`Capsule <../models/4_old_wines/2_capsule>`
# for more model implemenetations in DGL. # for more model implemenetations in DGL.
Basic Tutorials Basic Tutorials
=============== ===============
These tutorials conver the basics of DGL. These tutorials cover the basics of DGL.
""" """
.. _model-gcn: .. _model-gcn:
Graph Convolutional Network Graph Convolutional Network
==================================== ====================================
**Author:** `Qi Huang <https://github.com/HQ01>`_, `Minjie Wang <https://jermainewang.github.io/>`_, **Author:** `Qi Huang <https://github.com/HQ01>`_, `Minjie Wang <https://jermainewang.github.io/>`_,
Yu Gai, Quan Gan, Zheng Zhang Yu Gai, Quan Gan, Zheng Zhang
This is a gentle introduction of using DGL to implement Graph Convolutional This is a gentle introduction of using DGL to implement Graph Convolutional
Networks (Kipf & Welling et al., `Semi-Supervised Classificaton with Graph Networks (Kipf & Welling et al., `Semi-Supervised Classificaton with Graph
Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We build upon Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We build upon
the :doc:`earlier tutorial <../3_pagerank>` on DGLGraph and demonstrate the :doc:`earlier tutorial <../../basics/3_pagerank>` on DGLGraph and demonstrate
how DGL combines graph with deep neural network and learn structural representations. how DGL combines graph with deep neural network and learn structural representations.
""" """
############################################################################### ###############################################################################
# Model Overview # Model Overview
# ------------------------------------------ # ------------------------------------------
# GCN from the perspective of message passing # GCN from the perspective of message passing
# ``````````````````````````````````````````````` # ```````````````````````````````````````````````
# We describe a layer of graph convolutional neural network from a message # We describe a layer of graph convolutional neural network from a message
# passing perspective; the math can be found `here <math_>`_. # passing perspective; the math can be found `here <math_>`_.
# It boils down to the following step, for each node :math:`u`: # It boils down to the following step, for each node :math:`u`:
# #
# 1) Aggregate neighbors' representations :math:`h_{v}` to produce an # 1) Aggregate neighbors' representations :math:`h_{v}` to produce an
# intermediate representation :math:`\hat{h}_u`. 2) Transform the aggregated # intermediate representation :math:`\hat{h}_u`. 2) Transform the aggregated
# representation :math:`\hat{h}_{u}` with a linear projection followed by a # representation :math:`\hat{h}_{u}` with a linear projection followed by a
# non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`. # non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`.
# #
# We will implement step 1 with DGL message passing, and step 2 with the # We will implement step 1 with DGL message passing, and step 2 with the
# ``apply_nodes`` method, whose node UDF will be a PyTorch ``nn.Module``. # ``apply_nodes`` method, whose node UDF will be a PyTorch ``nn.Module``.
# #
# GCN implementation with DGL # GCN implementation with DGL
# `````````````````````````````````````````` # ``````````````````````````````````````````
# We first define the message and reduce function as usual. Since the # We first define the message and reduce function as usual. Since the
# aggregation on a node :math:`u` only involves summing over the neighbors' # aggregation on a node :math:`u` only involves summing over the neighbors'
# representations :math:`h_v`, we can simply use builtin functions: # representations :math:`h_v`, we can simply use builtin functions:
import dgl import dgl
import dgl.function as fn import dgl.function as fn
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from dgl import DGLGraph from dgl import DGLGraph
gcn_msg = fn.copy_src(src='h', out='m') gcn_msg = fn.copy_src(src='h', out='m')
gcn_reduce = fn.sum(msg='m', out='h') gcn_reduce = fn.sum(msg='m', out='h')
############################################################################### ###############################################################################
# We then define the node UDF for ``apply_nodes``, which is a fully-connected layer: # We then define the node UDF for ``apply_nodes``, which is a fully-connected layer:
class NodeApplyModule(nn.Module): class NodeApplyModule(nn.Module):
def __init__(self, in_feats, out_feats, activation): def __init__(self, in_feats, out_feats, activation):
super(NodeApplyModule, self).__init__() super(NodeApplyModule, self).__init__()
self.linear = nn.Linear(in_feats, out_feats) self.linear = nn.Linear(in_feats, out_feats)
self.activation = activation self.activation = activation
def forward(self, node): def forward(self, node):
h = self.linear(node.data['h']) h = self.linear(node.data['h'])
h = self.activation(h) h = self.activation(h)
return {'h' : h} return {'h' : h}
############################################################################### ###############################################################################
# We then proceed to define the GCN module. A GCN layer essentially performs # We then proceed to define the GCN module. A GCN layer essentially performs
# message passing on all the nodes then applies the `NodeApplyModule`. Note # message passing on all the nodes then applies the `NodeApplyModule`. Note
# that we omitted the dropout in the paper for simplicity. # that we omitted the dropout in the paper for simplicity.
class GCN(nn.Module): class GCN(nn.Module):
def __init__(self, in_feats, out_feats, activation): def __init__(self, in_feats, out_feats, activation):
super(GCN, self).__init__() super(GCN, self).__init__()
self.apply_mod = NodeApplyModule(in_feats, out_feats, activation) self.apply_mod = NodeApplyModule(in_feats, out_feats, activation)
def forward(self, g, feature): def forward(self, g, feature):
g.ndata['h'] = feature g.ndata['h'] = feature
g.update_all(gcn_msg, gcn_reduce) g.update_all(gcn_msg, gcn_reduce)
g.apply_nodes(func=self.apply_mod) g.apply_nodes(func=self.apply_mod)
return g.ndata.pop('h') return g.ndata.pop('h')
############################################################################### ###############################################################################
# The forward function is essentially the same as any other commonly seen NNs # The forward function is essentially the same as any other commonly seen NNs
# model in PyTorch. We can initialize GCN like any ``nn.Module``. For example, # model in PyTorch. We can initialize GCN like any ``nn.Module``. For example,
# let's define a simple neural network consisting of two GCN layers. Suppose we # let's define a simple neural network consisting of two GCN layers. Suppose we
# are training the classifier for the cora dataset (the input feature size is # are training the classifier for the cora dataset (the input feature size is
# 1433 and the number of classes is 7). # 1433 and the number of classes is 7).
class Net(nn.Module): class Net(nn.Module):
def __init__(self): def __init__(self):
super(Net, self).__init__() super(Net, self).__init__()
self.gcn1 = GCN(1433, 16, F.relu) self.gcn1 = GCN(1433, 16, F.relu)
self.gcn2 = GCN(16, 7, F.relu) self.gcn2 = GCN(16, 7, F.relu)
def forward(self, g, features): def forward(self, g, features):
x = self.gcn1(g, features) x = self.gcn1(g, features)
x = self.gcn2(g, x) x = self.gcn2(g, x)
return x return x
net = Net() net = Net()
print(net) print(net)
############################################################################### ###############################################################################
# We load the cora dataset using DGL's built-in data module. # We load the cora dataset using DGL's built-in data module.
from dgl.data import citation_graph as citegrh from dgl.data import citation_graph as citegrh
def load_cora_data(): def load_cora_data():
data = citegrh.load_cora() data = citegrh.load_cora()
features = th.FloatTensor(data.features) features = th.FloatTensor(data.features)
labels = th.LongTensor(data.labels) labels = th.LongTensor(data.labels)
mask = th.ByteTensor(data.train_mask) mask = th.ByteTensor(data.train_mask)
g = DGLGraph(data.graph) g = DGLGraph(data.graph)
return g, features, labels, mask return g, features, labels, mask
############################################################################### ###############################################################################
# We then train the network as follows: # We then train the network as follows:
import time import time
import numpy as np import numpy as np
g, features, labels, mask = load_cora_data() g, features, labels, mask = load_cora_data()
optimizer = th.optim.Adam(net.parameters(), lr=1e-3) optimizer = th.optim.Adam(net.parameters(), lr=1e-3)
dur = [] dur = []
for epoch in range(30): for epoch in range(30):
if epoch >=3: if epoch >=3:
t0 = time.time() t0 = time.time()
logits = net(g, features) logits = net(g, features)
logp = F.log_softmax(logits, 1) logp = F.log_softmax(logits, 1)
loss = F.nll_loss(logp[mask], labels[mask]) loss = F.nll_loss(logp[mask], labels[mask])
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() loss.backward()
optimizer.step() optimizer.step()
if epoch >=3: if epoch >=3:
dur.append(time.time() - t0) dur.append(time.time() - t0)
print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format( print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format(
epoch, loss.item(), np.mean(dur))) epoch, loss.item(), np.mean(dur)))
############################################################################### ###############################################################################
# .. _math: # .. _math:
# #
# GCN in one formula # GCN in one formula
# ------------------ # ------------------
# Mathematically, the GCN model follows this formula: # Mathematically, the GCN model follows this formula:
# #
# :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})` # :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})`
# #
# Here, :math:`H^{(l)}` denotes the :math:`l^{th}` layer in the network, # Here, :math:`H^{(l)}` denotes the :math:`l^{th}` layer in the network,
# :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for # :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for
# this layer. :math:`D` and :math:`A`, as commonly seen, represent degree # this layer. :math:`D` and :math:`A`, as commonly seen, represent degree
# matrix and adjacency matrix, respectively. The ~ is a renormalization trick # matrix and adjacency matrix, respectively. The ~ is a renormalization trick
# in which we add a self-connection to each node of the graph, and build the # in which we add a self-connection to each node of the graph, and build the
# corresponding degree and adjacency matrix. The shape of the input # corresponding degree and adjacency matrix. The shape of the input
# :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes # :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes
# and :math:`D` is the number of input features. We can chain up multiple # and :math:`D` is the number of input features. We can chain up multiple
# layers as such to produce a node-level representation output with shape # layers as such to produce a node-level representation output with shape
# :math`N \times F`, where :math:`F` is the dimension of the output node # :math`N \times F`, where :math:`F` is the dimension of the output node
# feature vector. # feature vector.
# #
# The equation can be efficiently implemented using sparse matrix # The equation can be efficiently implemented using sparse matrix
# multiplication kernels (such as Kipf's # multiplication kernels (such as Kipf's
# `pygcn <https://github.com/tkipf/pygcn>`_ code). The above DGL implementation # `pygcn <https://github.com/tkipf/pygcn>`_ code). The above DGL implementation
# in fact has already used this trick due to the use of builtin functions. To # in fact has already used this trick due to the use of builtin functions. To
# understand what is under the hood, please read our tutorial on :doc:`PageRank <../3_pagerank>`. # understand what is under the hood, please read our tutorial on :doc:`PageRank <../../basics/3_pagerank>`.
.. _tutorials1-index:
Graph Neural Network and its variant
------------------------------------
* **GCN** `[paper] <https://arxiv.org/abs/1609.02907>`__ `[tutorial] <models/1_gcn.html>`__
`[code] <https://github.com/jermainewang/dgl/blob/master/examples/pytorch/gcn/gcn.py>`__:
this is the vanilla GCN. The tutorial covers the basic uses of DGL APIs.
* **GAT** `[paper] <https://arxiv.org/abs/1710.10903>`__
`[code] <https://github.com/jermainewang/dgl/blob/master/examples/pytorch/gat/gat.py>`__:
the key extension of GAT w.r.t vanilla GCN is deploying multi-head attention
among neighborhood of a node, thus greatly enhances the capacity and
expressiveness of the model.
* **R-GCN** `[paper] <https://arxiv.org/abs/1703.06103>`__ `[tutorial] <models/4_rgcn.html>`__
[code (wip)]: the key
difference of RGNN is to allow multi-edges among two entities of a graph, and
edges with distinct relationships are encoded differently. This is an
interesting extension of GCN that can have a lot of applications of its own.
* **LGNN** `[paper] <https://arxiv.org/abs/1705.08415>`__ `[tutorial (wip)]` `[code (wip)]`:
this model focuses on community detection by inspecting graph structures. It
uses representations of both the orignal graph and its line-graph companion. In
addition to demonstrate how an algorithm can harness multiple graphs, our
implementation shows how one can judiciously mix vanilla tensor operation,
sparse-matrix tensor operations, along with message-passing with DGL.
* **SSE** `[paper] <http://proceedings.mlr.press/v80/dai18a/dai18a.pdf>`__ `[tutorial (wip)]`
`[code] <https://github.com/jermainewang/dgl/blob/master/examples/mxnet/sse/sse_batch.py>`__:
the emphasize here is *giant* graph that cannot fit comfortably on one GPU
card. SSE is an example to illustrate the co-design of both algrithm and
system: sampling to guarantee asymptotic covergence while lowering the
complexity, and batching across samples for maximum parallelism.
\ No newline at end of file
.. _tutorials2-index:
Dealing with many small graphs
------------------------------
* **Tree-LSTM** `[paper] <https://arxiv.org/abs/1503.00075>`__ `[tutorial] <models/3_tree-lstm.html>`__
`[code] <https://github.com/jermainewang/dgl/blob/master/examples/pytorch/tree_lstm/tree_lstm.py>`__:
sentences of natural languages have inherent structures, which are thrown away
by treating them simply as sequences. Tree-LSTM is a powerful model that learns
the representation by leveraging prior syntactic structures (e.g. parse-tree).
The challenge to train it well is that simply by padding a sentence to the
maximum length no longer works, since trees of different sentences have
different sizes and topologies. DGL solves this problem by throwing the trees
into a bigger "container" graph, and use message-passing to explore maximum
parallelism. The key API we use is batching.
.. _tutorials3-index:
Generative models
------------------------------
* **DGMG** `[paper] <https://arxiv.org/abs/1803.03324>`__ `[tutorial] <models/5_dgmg.html>`__
`[code] <https://github.com/jermainewang/dgl/tree/master/examples/pytorch/dgmg>`__:
this model belongs to the important family that deals with structural
generation. DGMG is interesting because its state-machine approach is the most
general. It is also very challenging because, unlike Tree-LSTM, every sample
has a dynamic, probability-driven structure that is not available before
training. We are able to progressively leverage intra- and inter-graph
parallelism to steadily improve the performance.
* **JTNN** `[paper] <https://arxiv.org/abs/1802.04364>`__ `[code (wip)]`: unlike DGMG, this
paper generates molecular graphs using the framework of variational
auto-encoder. Perhaps more interesting is its approach to build structure
hierarchically, in the case of molecular, with junction tree as the middle
scaffolding.
.. _tutorials4-index:
Old (new) wines in new bottle
-----------------------------
* **Capsule** `[paper] <https://arxiv.org/abs/1710.09829>`__ `[tutorial] <models/2_capsule.html>`__
`[code] <https://github.com/jermainewang/dgl/tree/master/examples/pytorch/capsule>`__: this new
computer vision model has two key ideas -- enhancing the feature representation
in a vector form (instead of a scalar) called *capsule*, and replacing
maxpooling with dynamic routing. The idea of dynamic routing is to integrate a
lower level capsule to one (or several) of a higher level one with
non-parametric message-passing. We show how the later can be nicely implemented
with DGL APIs.
* **Transformer** `[paper] <https://arxiv.org/abs/1706.03762>`__ `[tutorial (wip)]` `[code (wip)]` and
**Universal Transformer** `[paper] <https://arxiv.org/abs/1807.03819>`__ `[tutorial (wip)]`
`[code (wip)]`: these
two models replace RNN with several layers of multi-head attention to encode
and discover structures among tokens of a sentence. These attention mechanisms
can similarly formulated as graph operations with message-passing.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment