Unverified Commit dce89919 authored by Hongzhi (Steve), Chen's avatar Hongzhi (Steve), Chen Committed by GitHub
Browse files

[Misc] Auto-reformat multiple python folders. (#5325)



* auto-reformat

* lintrunner

---------
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>
parent ab812179
from ruamel.yaml.comments import CommentedMap from ruamel.yaml.comments import CommentedMap
...@@ -14,12 +13,14 @@ def deep_convert_dict(layer): ...@@ -14,12 +13,14 @@ def deep_convert_dict(layer):
return to_ret return to_ret
import collections.abc import collections.abc
def merge_comment(d, comment_dict, column=30): def merge_comment(d, comment_dict, column=30):
for k, v in comment_dict.items(): for k, v in comment_dict.items():
if isinstance(v, collections.abc.Mapping): if isinstance(v, collections.abc.Mapping):
d[k] = merge_comment(d.get(k, CommentedMap()), v) d[k] = merge_comment(d.get(k, CommentedMap()), v)
else: else:
d.yaml_add_eol_comment(v, key=k, column=column) d.yaml_add_eol_comment(v, key=k, column=column)
return d return d
\ No newline at end of file
#!/usr/bin/env python #!/usr/bin/env python
from setuptools import find_packages
from distutils.core import setup from distutils.core import setup
setup(name='dglgo', from setuptools import find_packages
version='0.0.2',
description='DGL', setup(
author='DGL Team', name="dglgo",
author_email='wmjlyjemaine@gmail.com', version="0.0.2",
packages=find_packages(), description="DGL",
install_requires=[ author="DGL Team",
'typer>=0.4.0', author_email="wmjlyjemaine@gmail.com",
'isort>=5.10.1', packages=find_packages(),
'autopep8>=1.6.0', install_requires=[
'numpydoc>=1.1.0', "typer>=0.4.0",
"pydantic>=1.9.0", "isort>=5.10.1",
"ruamel.yaml>=0.17.20", "autopep8>=1.6.0",
"PyYAML>=5.1", "numpydoc>=1.1.0",
"ogb>=1.3.3", "pydantic>=1.9.0",
"rdkit-pypi", "ruamel.yaml>=0.17.20",
"scikit-learn>=0.20.0" "PyYAML>=5.1",
], "ogb>=1.3.3",
package_data={"": ["./*"]}, "rdkit-pypi",
include_package_data=True, "scikit-learn>=0.20.0",
license='APACHE', ],
entry_points={ package_data={"": ["./*"]},
'console_scripts': [ include_package_data=True,
"dgl = dglgo.cli.cli:main" license="APACHE",
] entry_points={"console_scripts": ["dgl = dglgo.cli.cli:main"]},
}, url="https://github.com/dmlc/dgl",
url='https://github.com/dmlc/dgl', )
)
...@@ -14,16 +14,18 @@ ...@@ -14,16 +14,18 @@
# #
import os import os
import sys import sys
sys.path.insert(0, os.path.abspath('../../python'))
sys.path.insert(0, os.path.abspath("../../python"))
# -- Project information ----------------------------------------------------- # -- Project information -----------------------------------------------------
project = 'DGL' project = "DGL"
copyright = '2018, DGL Team' copyright = "2018, DGL Team"
author = 'DGL Team' author = "DGL Team"
import dgl import dgl
version = dgl.__version__ version = dgl.__version__
release = dgl.__version__ release = dgl.__version__
dglbackend = os.environ.get("DGLBACKEND", "pytorch") dglbackend = os.environ.get("DGLBACKEND", "pytorch")
...@@ -39,35 +41,35 @@ dglbackend = os.environ.get("DGLBACKEND", "pytorch") ...@@ -39,35 +41,35 @@ dglbackend = os.environ.get("DGLBACKEND", "pytorch")
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones. # ones.
extensions = [ extensions = [
'sphinx.ext.autodoc', "sphinx.ext.autodoc",
'sphinx.ext.autosummary', "sphinx.ext.autosummary",
'sphinx.ext.coverage', "sphinx.ext.coverage",
'sphinx.ext.mathjax', "sphinx.ext.mathjax",
'sphinx.ext.napoleon', "sphinx.ext.napoleon",
'sphinx.ext.viewcode', "sphinx.ext.viewcode",
'sphinx.ext.intersphinx', "sphinx.ext.intersphinx",
'sphinx.ext.graphviz', "sphinx.ext.graphviz",
'sphinxemoji.sphinxemoji', "sphinxemoji.sphinxemoji",
'sphinx_gallery.gen_gallery', "sphinx_gallery.gen_gallery",
'sphinx_copybutton', "sphinx_copybutton",
'nbsphinx', "nbsphinx",
'nbsphinx_link', "nbsphinx_link",
] ]
# Do not run notebooks on non-pytorch backends # Do not run notebooks on non-pytorch backends
if dglbackend != "pytorch": if dglbackend != "pytorch":
nbsphinx_execute = 'never' nbsphinx_execute = "never"
# Add any paths that contain templates here, relative to this directory. # Add any paths that contain templates here, relative to this directory.
templates_path = ['_templates'] templates_path = ["_templates"]
# The suffix(es) of source filenames. # The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string: # You can specify multiple suffix as a list of string:
# #
source_suffix = ['.rst', '.md'] source_suffix = [".rst", ".md"]
# The master toctree document. # The master toctree document.
master_doc = 'index' master_doc = "index"
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
...@@ -90,7 +92,7 @@ pygments_style = None ...@@ -90,7 +92,7 @@ pygments_style = None
# The theme to use for HTML and HTML Help pages. See the documentation for # The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes. # a list of builtin themes.
# #
html_theme = 'sphinx_rtd_theme' html_theme = "sphinx_rtd_theme"
# Theme options are theme-specific and customize the look and feel of a theme # Theme options are theme-specific and customize the look and feel of a theme
# further. For a list of options available for each theme, see the # further. For a list of options available for each theme, see the
...@@ -101,8 +103,8 @@ html_theme = 'sphinx_rtd_theme' ...@@ -101,8 +103,8 @@ html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here, # Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files, # relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css". # so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static'] html_static_path = ["_static"]
html_css_files = ['css/custom.css'] html_css_files = ["css/custom.css"]
# Custom sidebar templates, must be a dictionary that maps document names # Custom sidebar templates, must be a dictionary that maps document names
# to template names. # to template names.
...@@ -118,7 +120,7 @@ html_css_files = ['css/custom.css'] ...@@ -118,7 +120,7 @@ html_css_files = ['css/custom.css']
# -- Options for HTMLHelp output --------------------------------------------- # -- Options for HTMLHelp output ---------------------------------------------
# Output file base name for HTML help builder. # Output file base name for HTML help builder.
htmlhelp_basename = 'dgldoc' htmlhelp_basename = "dgldoc"
# -- Options for LaTeX output ------------------------------------------------ # -- Options for LaTeX output ------------------------------------------------
...@@ -127,15 +129,12 @@ latex_elements = { ...@@ -127,15 +129,12 @@ latex_elements = {
# The paper size ('letterpaper' or 'a4paper'). # The paper size ('letterpaper' or 'a4paper').
# #
# 'papersize': 'letterpaper', # 'papersize': 'letterpaper',
# The font size ('10pt', '11pt' or '12pt'). # The font size ('10pt', '11pt' or '12pt').
# #
# 'pointsize': '10pt', # 'pointsize': '10pt',
# Additional stuff for the LaTeX preamble. # Additional stuff for the LaTeX preamble.
# #
# 'preamble': '', # 'preamble': '',
# Latex figure (float) alignment # Latex figure (float) alignment
# #
# 'figure_align': 'htbp', # 'figure_align': 'htbp',
...@@ -145,8 +144,7 @@ latex_elements = { ...@@ -145,8 +144,7 @@ latex_elements = {
# (source start file, target name, title, # (source start file, target name, title,
# author, documentclass [howto, manual, or own class]). # author, documentclass [howto, manual, or own class]).
latex_documents = [ latex_documents = [
(master_doc, 'dgl.tex', 'DGL Documentation', (master_doc, "dgl.tex", "DGL Documentation", "DGL Team", "manual"),
'DGL Team', 'manual'),
] ]
...@@ -154,10 +152,7 @@ latex_documents = [ ...@@ -154,10 +152,7 @@ latex_documents = [
# One entry per manual page. List of tuples # One entry per manual page. List of tuples
# (source start file, name, description, authors, manual section). # (source start file, name, description, authors, manual section).
man_pages = [ man_pages = [(master_doc, "dgl", "DGL Documentation", [author], 1)]
(master_doc, 'dgl', 'DGL Documentation',
[author], 1)
]
# -- Options for Texinfo output ---------------------------------------------- # -- Options for Texinfo output ----------------------------------------------
...@@ -166,9 +161,15 @@ man_pages = [ ...@@ -166,9 +161,15 @@ man_pages = [
# (source start file, target name, title, author, # (source start file, target name, title, author,
# dir menu entry, description, category) # dir menu entry, description, category)
texinfo_documents = [ texinfo_documents = [
(master_doc, 'dgl', 'DGL Documentation', (
author, 'dgl', 'Library for deep learning on graphs.', master_doc,
'Miscellaneous'), "dgl",
"DGL Documentation",
author,
"dgl",
"Library for deep learning on graphs.",
"Miscellaneous",
),
] ]
...@@ -187,64 +188,71 @@ epub_title = project ...@@ -187,64 +188,71 @@ epub_title = project
# epub_uid = '' # epub_uid = ''
# A list of files that should not be packed into the epub file. # A list of files that should not be packed into the epub file.
epub_exclude_files = ['search.html'] epub_exclude_files = ["search.html"]
# -- Extension configuration ------------------------------------------------- # -- Extension configuration -------------------------------------------------
autosummary_generate = True autosummary_generate = True
autodoc_member_order = 'alphabetical' autodoc_member_order = "alphabetical"
intersphinx_mapping = { intersphinx_mapping = {
'python': ('https://docs.python.org/{.major}'.format(sys.version_info), None), "python": (
'numpy': ('http://docs.scipy.org/doc/numpy/', None), "https://docs.python.org/{.major}".format(sys.version_info),
'scipy': ('http://docs.scipy.org/doc/scipy/reference', None), None,
'matplotlib': ('http://matplotlib.org/', None), ),
'networkx' : ('https://networkx.github.io/documentation/stable', None), "numpy": ("http://docs.scipy.org/doc/numpy/", None),
"scipy": ("http://docs.scipy.org/doc/scipy/reference", None),
"matplotlib": ("http://matplotlib.org/", None),
"networkx": ("https://networkx.github.io/documentation/stable", None),
} }
# sphinx gallery configurations # sphinx gallery configurations
from sphinx_gallery.sorting import FileNameSortKey from sphinx_gallery.sorting import FileNameSortKey
examples_dirs = ['../../tutorials/blitz', examples_dirs = [
'../../tutorials/large', "../../tutorials/blitz",
'../../tutorials/dist', "../../tutorials/large",
'../../tutorials/models', "../../tutorials/dist",
'../../tutorials/multi', "../../tutorials/models",
'../../tutorials/cpu'] # path to find sources "../../tutorials/multi",
gallery_dirs = ['tutorials/blitz/', "../../tutorials/cpu",
'tutorials/large/', ] # path to find sources
'tutorials/dist/', gallery_dirs = [
'tutorials/models/', "tutorials/blitz/",
'tutorials/multi/', "tutorials/large/",
'tutorials/cpu'] # path to generate docs "tutorials/dist/",
"tutorials/models/",
"tutorials/multi/",
"tutorials/cpu",
] # path to generate docs
if dglbackend != "pytorch": if dglbackend != "pytorch":
examples_dirs = [] examples_dirs = []
gallery_dirs = [] gallery_dirs = []
reference_url = { reference_url = {
'dgl' : None, "dgl": None,
'numpy': 'http://docs.scipy.org/doc/numpy/', "numpy": "http://docs.scipy.org/doc/numpy/",
'scipy': 'http://docs.scipy.org/doc/scipy/reference', "scipy": "http://docs.scipy.org/doc/scipy/reference",
'matplotlib': 'http://matplotlib.org/', "matplotlib": "http://matplotlib.org/",
'networkx' : 'https://networkx.github.io/documentation/stable', "networkx": "https://networkx.github.io/documentation/stable",
} }
sphinx_gallery_conf = { sphinx_gallery_conf = {
'backreferences_dir' : 'generated/backreferences', "backreferences_dir": "generated/backreferences",
'doc_module' : ('dgl', 'numpy'), "doc_module": ("dgl", "numpy"),
'examples_dirs' : examples_dirs, "examples_dirs": examples_dirs,
'gallery_dirs' : gallery_dirs, "gallery_dirs": gallery_dirs,
'within_subsection_order' : FileNameSortKey, "within_subsection_order": FileNameSortKey,
'filename_pattern' : '.py', "filename_pattern": ".py",
'download_all_examples' : False, "download_all_examples": False,
} }
# Compatibility for different backend when builds tutorials # Compatibility for different backend when builds tutorials
if dglbackend == 'mxnet': if dglbackend == "mxnet":
sphinx_gallery_conf['filename_pattern'] = "/*(?<=mx)\.py" sphinx_gallery_conf["filename_pattern"] = "/*(?<=mx)\.py"
if dglbackend == 'pytorch': if dglbackend == "pytorch":
sphinx_gallery_conf['filename_pattern'] = "/*(?<!mx)\.py" sphinx_gallery_conf["filename_pattern"] = "/*(?<!mx)\.py"
# sphinx-copybutton tool # sphinx-copybutton tool
copybutton_prompt_text = r'>>> |\.\.\. ' copybutton_prompt_text = r">>> |\.\.\. "
copybutton_prompt_is_regexp = True copybutton_prompt_is_regexp = True
from pytablewriter import RstGridTableWriter, MarkdownTableWriter
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from dgl import DGLGraph from dgl import DGLGraph
from dgl.data.gnn_benchmark import AmazonCoBuy, CoraFull, Coauthor
from dgl.data.karate import KarateClub # from dgl.data.qm9 import QM9
from dgl.data.gindt import GINDataset from dgl.data import CitationGraphDataset, PPIDataset, RedditDataset, TUDataset
from dgl.data.bitcoinotc import BitcoinOTC from dgl.data.bitcoinotc import BitcoinOTC
from dgl.data.gdelt import GDELT from dgl.data.gdelt import GDELT
from dgl.data.gindt import GINDataset
from dgl.data.gnn_benchmark import AmazonCoBuy, Coauthor, CoraFull
from dgl.data.icews18 import ICEWS18 from dgl.data.icews18 import ICEWS18
from dgl.data.karate import KarateClub
from dgl.data.qm7b import QM7b from dgl.data.qm7b import QM7b
# from dgl.data.qm9 import QM9 from pytablewriter import MarkdownTableWriter, RstGridTableWriter
from dgl.data import CitationGraphDataset, PPIDataset, RedditDataset, TUDataset
ds_list = { ds_list = {
"BitcoinOTC": "BitcoinOTC()", "BitcoinOTC": "BitcoinOTC()",
...@@ -40,9 +41,9 @@ writer = RstGridTableWriter() ...@@ -40,9 +41,9 @@ writer = RstGridTableWriter()
# writer = MarkdownTableWriter() # writer = MarkdownTableWriter()
extract_graph = lambda g: g if isinstance(g, DGLGraph) else g[0] extract_graph = lambda g: g if isinstance(g, DGLGraph) else g[0]
stat_list=[] stat_list = []
for k,v in ds_list.items(): for k, v in ds_list.items():
print(k, ' ', v) print(k, " ", v)
ds = eval(v.split("/")[0]) ds = eval(v.split("/")[0])
num_nodes = [] num_nodes = []
num_edges = [] num_edges = []
...@@ -58,10 +59,10 @@ for k,v in ds_list.items(): ...@@ -58,10 +59,10 @@ for k,v in ds_list.items():
"# of graphs": len(ds), "# of graphs": len(ds),
"Avg. # of nodes": np.mean(num_nodes), "Avg. # of nodes": np.mean(num_nodes),
"Avg. # of edges": np.mean(num_edges), "Avg. # of edges": np.mean(num_edges),
"Node field": ', '.join(list(gg.ndata.keys())), "Node field": ", ".join(list(gg.ndata.keys())),
"Edge field": ', '.join(list(gg.edata.keys())), "Edge field": ", ".join(list(gg.edata.keys())),
# "Graph field": ', '.join(ds[0][0].gdata.keys()) if hasattr(ds[0][0], "gdata") else "", # "Graph field": ', '.join(ds[0][0].gdata.keys()) if hasattr(ds[0][0], "gdata") else "",
"Temporal": hasattr(ds, "is_temporal") "Temporal": hasattr(ds, "is_temporal"),
} }
stat_list.append(dd) stat_list.append(dd)
......
...@@ -26,15 +26,14 @@ def get_sddmm_kernels_gpu(idtypes, dtypes): ...@@ -26,15 +26,14 @@ def get_sddmm_kernels_gpu(idtypes, dtypes):
return ret return ret
if __name__ == '__main__': if __name__ == "__main__":
binary_path = 'libfeatgraph_kernels.so' binary_path = "libfeatgraph_kernels.so"
kernels = [] kernels = []
idtypes = ['int32', 'int64'] idtypes = ["int32", "int64"]
dtypes = ['float16', 'float64', 'float32', 'int32', 'int64'] dtypes = ["float16", "float64", "float32", "int32", "int64"]
kernels += get_sddmm_kernels_gpu(idtypes, dtypes) kernels += get_sddmm_kernels_gpu(idtypes, dtypes)
# build kernels and export the module to libfeatgraph_kernels.so # build kernels and export the module to libfeatgraph_kernels.so
module = tvm.build(kernels, target='cuda', target_host='llvm') module = tvm.build(kernels, target="cuda", target_host="llvm")
module.export_library(binary_path) module.export_library(binary_path)
...@@ -4,8 +4,8 @@ from tvm import te ...@@ -4,8 +4,8 @@ from tvm import te
def sddmm_tree_reduction_gpu(idx_type, feat_type): def sddmm_tree_reduction_gpu(idx_type, feat_type):
""" SDDMM kernels on GPU optimized with Tree Reduction. """SDDMM kernels on GPU optimized with Tree Reduction.
Parameters Parameters
---------- ----------
idx_type : str idx_type : str
...@@ -19,35 +19,40 @@ def sddmm_tree_reduction_gpu(idx_type, feat_type): ...@@ -19,35 +19,40 @@ def sddmm_tree_reduction_gpu(idx_type, feat_type):
The result IRModule. The result IRModule.
""" """
# define vars and placeholders # define vars and placeholders
nnz = te.var('nnz', idx_type) nnz = te.var("nnz", idx_type)
num_rows = te.var('num_rows', idx_type) num_rows = te.var("num_rows", idx_type)
num_cols = te.var('num_cols', idx_type) num_cols = te.var("num_cols", idx_type)
H = te.var('num_heads', idx_type) H = te.var("num_heads", idx_type)
D = te.var('feat_len', idx_type) D = te.var("feat_len", idx_type)
row = te.placeholder((nnz,), idx_type, 'row') row = te.placeholder((nnz,), idx_type, "row")
col = te.placeholder((nnz,), idx_type, 'col') col = te.placeholder((nnz,), idx_type, "col")
ufeat = te.placeholder((num_rows, H, D), feat_type, 'ufeat') ufeat = te.placeholder((num_rows, H, D), feat_type, "ufeat")
vfeat = te.placeholder((num_cols, H, D), feat_type, 'vfeat') vfeat = te.placeholder((num_cols, H, D), feat_type, "vfeat")
# define edge computation function # define edge computation function
def edge_func(eid, h, i): def edge_func(eid, h, i):
k = te.reduce_axis((0, D), name='k') k = te.reduce_axis((0, D), name="k")
return te.sum(ufeat[row[eid], h, k] * vfeat[col[eid], h, k], axis=k) return te.sum(ufeat[row[eid], h, k] * vfeat[col[eid], h, k], axis=k)
out = te.compute((nnz, H, tvm.tir.IntImm(idx_type, 1)), edge_func, name='out')
out = te.compute(
(nnz, H, tvm.tir.IntImm(idx_type, 1)), edge_func, name="out"
)
# define schedules # define schedules
sched = te.create_schedule(out.op) sched = te.create_schedule(out.op)
edge_axis, head_axis, _ = out.op.axis edge_axis, head_axis, _ = out.op.axis
reduce_axis = out.op.reduce_axis[0] reduce_axis = out.op.reduce_axis[0]
_, red_inner = sched[out].split(reduce_axis, factor=32) _, red_inner = sched[out].split(reduce_axis, factor=32)
edge_outer, edge_inner = sched[out].split(edge_axis, factor=32) edge_outer, edge_inner = sched[out].split(edge_axis, factor=32)
sched[out].bind(red_inner, te.thread_axis('threadIdx.x')) sched[out].bind(red_inner, te.thread_axis("threadIdx.x"))
sched[out].bind(edge_inner, te.thread_axis('threadIdx.y')) sched[out].bind(edge_inner, te.thread_axis("threadIdx.y"))
sched[out].bind(edge_outer, te.thread_axis('blockIdx.x')) sched[out].bind(edge_outer, te.thread_axis("blockIdx.x"))
sched[out].bind(head_axis, te.thread_axis('blockIdx.y')) sched[out].bind(head_axis, te.thread_axis("blockIdx.y"))
return tvm.lower(sched, [row, col, ufeat, vfeat, out], return tvm.lower(
name='SDDMMTreeReduction_{}_{}'.format(idx_type, feat_type)) sched,
[row, col, ufeat, vfeat, out],
name="SDDMMTreeReduction_{}_{}".format(idx_type, feat_type),
)
if __name__ == '__main__': if __name__ == "__main__":
kernel0 = sddmm_tree_reduction_gpu('int32', 'float32') kernel0 = sddmm_tree_reduction_gpu("int32", "float32")
print(kernel0) print(kernel0)
import torch
import dgl import dgl
import dgl.backend as F import dgl.backend as F
import torch
g = dgl.rand_graph(10, 15).int().to(torch.device(0)) g = dgl.rand_graph(10, 15).int().to(torch.device(0))
gidx = g._graph gidx = g._graph
u = torch.rand((10,2,8), device=torch.device(0)) u = torch.rand((10, 2, 8), device=torch.device(0))
v = torch.rand((10,2,8), device=torch.device(0)) v = torch.rand((10, 2, 8), device=torch.device(0))
e = dgl.ops.gsddmm(g, 'dot', u, v) e = dgl.ops.gsddmm(g, "dot", u, v)
print(e) print(e)
e = torch.zeros((15,2,1), device=torch.device(0)) e = torch.zeros((15, 2, 1), device=torch.device(0))
u = F.zerocopy_to_dgl_ndarray(u) u = F.zerocopy_to_dgl_ndarray(u)
v = F.zerocopy_to_dgl_ndarray(v) v = F.zerocopy_to_dgl_ndarray(v)
e = F.zerocopy_to_dgl_ndarray_for_write(e) e = F.zerocopy_to_dgl_ndarray_for_write(e)
......
...@@ -22,13 +22,13 @@ networks with PyTorch. ...@@ -22,13 +22,13 @@ networks with PyTorch.
""" """
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import torch
import torch.nn as nn
import torch.nn.functional as F
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import dgl.data import dgl.data
import torch
import torch.nn as nn
import torch.nn.functional as F
###################################################################### ######################################################################
# Overview of Node Classification with GNN # Overview of Node Classification with GNN
......
...@@ -31,11 +31,11 @@ By the end of this tutorial you will be able to: ...@@ -31,11 +31,11 @@ By the end of this tutorial you will be able to:
# #
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import numpy as np
import torch
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import numpy as np
import torch
g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6) g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6)
# Equivalently, PyTorch LongTensors also work. # Equivalently, PyTorch LongTensors also work.
......
...@@ -19,13 +19,13 @@ GNN for node classification <1_introduction>`. ...@@ -19,13 +19,13 @@ GNN for node classification <1_introduction>`.
""" """
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import torch
import torch.nn as nn
import torch.nn.functional as F
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import dgl.function as fn import dgl.function as fn
import torch
import torch.nn as nn
import torch.nn.functional as F
###################################################################### ######################################################################
# Message passing and GNNs # Message passing and GNNs
......
...@@ -19,17 +19,17 @@ By the end of this tutorial you will be able to ...@@ -19,17 +19,17 @@ By the end of this tutorial you will be able to
import itertools import itertools
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
os.environ["DGLBACKEND"] = "pytorch"
import dgl
import dgl.data
import numpy as np import numpy as np
import scipy.sparse as sp import scipy.sparse as sp
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import dgl
import dgl.data
###################################################################### ######################################################################
# Overview of Link Prediction with GNN # Overview of Link Prediction with GNN
# ------------------------------------ # ------------------------------------
......
...@@ -14,13 +14,13 @@ By the end of this tutorial, you will be able to ...@@ -14,13 +14,13 @@ By the end of this tutorial, you will be able to
""" """
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import torch
import torch.nn as nn
import torch.nn.functional as F
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import dgl.data import dgl.data
import torch
import torch.nn as nn
import torch.nn.functional as F
###################################################################### ######################################################################
# Overview of Graph Classification with GNN # Overview of Graph Classification with GNN
...@@ -54,6 +54,8 @@ print("Node feature dimensionality:", dataset.dim_nfeats) ...@@ -54,6 +54,8 @@ print("Node feature dimensionality:", dataset.dim_nfeats)
print("Number of graph categories:", dataset.gclasses) print("Number of graph categories:", dataset.gclasses)
from dgl.dataloading import GraphDataLoader
###################################################################### ######################################################################
# Defining Data Loader # Defining Data Loader
# -------------------- # --------------------
...@@ -74,8 +76,6 @@ print("Number of graph categories:", dataset.gclasses) ...@@ -74,8 +76,6 @@ print("Number of graph categories:", dataset.gclasses)
from torch.utils.data.sampler import SubsetRandomSampler from torch.utils.data.sampler import SubsetRandomSampler
from dgl.dataloading import GraphDataLoader
num_examples = len(dataset) num_examples = len(dataset)
num_train = int(num_examples * 0.8) num_train = int(num_examples * 0.8)
......
...@@ -88,10 +88,10 @@ interactions.head() ...@@ -88,10 +88,10 @@ interactions.head()
# #
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import torch
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import torch
from dgl.data import DGLDataset from dgl.data import DGLDataset
......
...@@ -26,10 +26,11 @@ Sampling for GNN Training <L0_neighbor_sampling_overview>`. ...@@ -26,10 +26,11 @@ Sampling for GNN Training <L0_neighbor_sampling_overview>`.
# #
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import torch
import numpy as np import numpy as np
import torch
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
dataset = DglNodePropPredDataset("ogbn-arxiv") dataset = DglNodePropPredDataset("ogbn-arxiv")
...@@ -284,13 +285,14 @@ valid_dataloader = dgl.dataloading.DataLoader( ...@@ -284,13 +285,14 @@ valid_dataloader = dgl.dataloading.DataLoader(
) )
import sklearn.metrics
###################################################################### ######################################################################
# The following is a training loop that performs validation every epoch. # The following is a training loop that performs validation every epoch.
# It also saves the model with the best validation accuracy into a file. # It also saves the model with the best validation accuracy into a file.
# #
import tqdm import tqdm
import sklearn.metrics
best_accuracy = 0 best_accuracy = 0
best_model_path = "model.pt" best_model_path = "model.pt"
......
...@@ -53,10 +53,11 @@ Sampling for Node Classification <L1_large_node_classification>`. ...@@ -53,10 +53,11 @@ Sampling for Node Classification <L1_large_node_classification>`.
# #
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import torch
import numpy as np import numpy as np
import torch
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
dataset = DglNodePropPredDataset("ogbn-arxiv") dataset = DglNodePropPredDataset("ogbn-arxiv")
...@@ -339,6 +340,8 @@ predictor = DotPredictor().to(device) ...@@ -339,6 +340,8 @@ predictor = DotPredictor().to(device)
opt = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters())) opt = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()))
import sklearn.metrics
###################################################################### ######################################################################
# The following is the training loop for link prediction and # The following is the training loop for link prediction and
# evaluation, and also saves the model that performs the best on the # evaluation, and also saves the model that performs the best on the
...@@ -346,7 +349,6 @@ opt = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters())) ...@@ -346,7 +349,6 @@ opt = torch.optim.Adam(list(model.parameters()) + list(predictor.parameters()))
# #
import tqdm import tqdm
import sklearn.metrics
best_accuracy = 0 best_accuracy = 0
best_model_path = "model.pt" best_model_path = "model.pt"
......
...@@ -14,30 +14,33 @@ for stochastic GNN training. It assumes that ...@@ -14,30 +14,33 @@ for stochastic GNN training. It assumes that
""" """
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
os.environ["DGLBACKEND"] = "pytorch"
import dgl import dgl
import torch
import numpy as np import numpy as np
import torch
from ogb.nodeproppred import DglNodePropPredDataset from ogb.nodeproppred import DglNodePropPredDataset
dataset = DglNodePropPredDataset('ogbn-arxiv') dataset = DglNodePropPredDataset("ogbn-arxiv")
device = 'cpu' # change to 'cuda' for GPU device = "cpu" # change to 'cuda' for GPU
graph, node_labels = dataset[0] graph, node_labels = dataset[0]
# Add reverse edges since ogbn-arxiv is unidirectional. # Add reverse edges since ogbn-arxiv is unidirectional.
graph = dgl.add_reverse_edges(graph) graph = dgl.add_reverse_edges(graph)
graph.ndata['label'] = node_labels[:, 0] graph.ndata["label"] = node_labels[:, 0]
idx_split = dataset.get_idx_split() idx_split = dataset.get_idx_split()
train_nids = idx_split['train'] train_nids = idx_split["train"]
node_features = graph.ndata['feat'] node_features = graph.ndata["feat"]
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4]) sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4])
train_dataloader = dgl.dataloading.DataLoader( train_dataloader = dgl.dataloading.DataLoader(
graph, train_nids, sampler, graph,
train_nids,
sampler,
batch_size=1024, batch_size=1024,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
num_workers=0 num_workers=0,
) )
input_nodes, output_nodes, mfgs = next(iter(train_dataloader)) input_nodes, output_nodes, mfgs = next(iter(train_dataloader))
...@@ -75,8 +78,8 @@ print(mfg.num_src_nodes(), mfg.num_dst_nodes()) ...@@ -75,8 +78,8 @@ print(mfg.num_src_nodes(), mfg.num_dst_nodes())
# will do with ``ndata`` on the graphs you have seen earlier: # will do with ``ndata`` on the graphs you have seen earlier:
# #
mfg.srcdata['x'] = torch.zeros(mfg.num_src_nodes(), mfg.num_dst_nodes()) mfg.srcdata["x"] = torch.zeros(mfg.num_src_nodes(), mfg.num_dst_nodes())
dst_feat = mfg.dstdata['feat'] dst_feat = mfg.dstdata["feat"]
###################################################################### ######################################################################
...@@ -105,7 +108,11 @@ mfg.srcdata[dgl.NID], mfg.dstdata[dgl.NID] ...@@ -105,7 +108,11 @@ mfg.srcdata[dgl.NID], mfg.dstdata[dgl.NID]
# .. |image1| image:: https://data.dgl.ai/tutorial/img/bipartite.gif # .. |image1| image:: https://data.dgl.ai/tutorial/img/bipartite.gif
# #
print(torch.equal(mfg.srcdata[dgl.NID][:mfg.num_dst_nodes()], mfg.dstdata[dgl.NID])) print(
torch.equal(
mfg.srcdata[dgl.NID][: mfg.num_dst_nodes()], mfg.dstdata[dgl.NID]
)
)
###################################################################### ######################################################################
...@@ -113,7 +120,7 @@ print(torch.equal(mfg.srcdata[dgl.NID][:mfg.num_dst_nodes()], mfg.dstdata[dgl.NI ...@@ -113,7 +120,7 @@ print(torch.equal(mfg.srcdata[dgl.NID][:mfg.num_dst_nodes()], mfg.dstdata[dgl.NI
# :math:`h_u^{(l-1)}`: # :math:`h_u^{(l-1)}`:
# #
mfg.srcdata['h'] = torch.randn(mfg.num_src_nodes(), 10) mfg.srcdata["h"] = torch.randn(mfg.num_src_nodes(), 10)
###################################################################### ######################################################################
...@@ -132,8 +139,8 @@ mfg.srcdata['h'] = torch.randn(mfg.num_src_nodes(), 10) ...@@ -132,8 +139,8 @@ mfg.srcdata['h'] = torch.randn(mfg.num_src_nodes(), 10)
import dgl.function as fn import dgl.function as fn
mfg.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h')) mfg.update_all(message_func=fn.copy_u("h", "m"), reduce_func=fn.mean("m", "h"))
m_v = mfg.dstdata['h'] m_v = mfg.dstdata["h"]
m_v m_v
...@@ -147,6 +154,7 @@ import torch.nn as nn ...@@ -147,6 +154,7 @@ import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
import tqdm import tqdm
class SAGEConv(nn.Module): class SAGEConv(nn.Module):
"""Graph convolution module used by the GraphSAGE model. """Graph convolution module used by the GraphSAGE model.
...@@ -157,6 +165,7 @@ class SAGEConv(nn.Module): ...@@ -157,6 +165,7 @@ class SAGEConv(nn.Module):
out_feat : int out_feat : int
Output feature size. Output feature size.
""" """
def __init__(self, in_feat, out_feat): def __init__(self, in_feat, out_feat):
super(SAGEConv, self).__init__() super(SAGEConv, self).__init__()
# A linear submodule for projecting the input and neighbor feature to the output. # A linear submodule for projecting the input and neighbor feature to the output.
...@@ -174,14 +183,15 @@ class SAGEConv(nn.Module): ...@@ -174,14 +183,15 @@ class SAGEConv(nn.Module):
""" """
with g.local_scope(): with g.local_scope():
h_src, h_dst = h h_src, h_dst = h
g.srcdata['h'] = h_src # <--- g.srcdata["h"] = h_src # <---
g.dstdata['h'] = h_dst # <--- g.dstdata["h"] = h_dst # <---
# update_all is a message passing API. # update_all is a message passing API.
g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'h_N')) g.update_all(fn.copy_u("h", "m"), fn.mean("m", "h_N"))
h_N = g.dstdata['h_N'] h_N = g.dstdata["h_N"]
h_total = torch.cat([h_dst, h_N], dim=1) # <--- h_total = torch.cat([h_dst, h_N], dim=1) # <---
return self.linear(h_total) return self.linear(h_total)
class Model(nn.Module): class Model(nn.Module):
def __init__(self, in_feats, h_feats, num_classes): def __init__(self, in_feats, h_feats, num_classes):
super(Model, self).__init__() super(Model, self).__init__()
...@@ -189,28 +199,31 @@ class Model(nn.Module): ...@@ -189,28 +199,31 @@ class Model(nn.Module):
self.conv2 = SAGEConv(h_feats, num_classes) self.conv2 = SAGEConv(h_feats, num_classes)
def forward(self, mfgs, x): def forward(self, mfgs, x):
h_dst = x[:mfgs[0].num_dst_nodes()] h_dst = x[: mfgs[0].num_dst_nodes()]
h = self.conv1(mfgs[0], (x, h_dst)) h = self.conv1(mfgs[0], (x, h_dst))
h = F.relu(h) h = F.relu(h)
h_dst = h[:mfgs[1].num_dst_nodes()] h_dst = h[: mfgs[1].num_dst_nodes()]
h = self.conv2(mfgs[1], (h, h_dst)) h = self.conv2(mfgs[1], (h, h_dst))
return h return h
sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4]) sampler = dgl.dataloading.MultiLayerNeighborSampler([4, 4])
train_dataloader = dgl.dataloading.DataLoader( train_dataloader = dgl.dataloading.DataLoader(
graph, train_nids, sampler, graph,
train_nids,
sampler,
device=device, device=device,
batch_size=1024, batch_size=1024,
shuffle=True, shuffle=True,
drop_last=False, drop_last=False,
num_workers=0 num_workers=0,
) )
model = Model(graph.ndata['feat'].shape[1], 128, dataset.num_classes).to(device) model = Model(graph.ndata["feat"].shape[1], 128, dataset.num_classes).to(device)
with tqdm.tqdm(train_dataloader) as tq: with tqdm.tqdm(train_dataloader) as tq:
for step, (input_nodes, output_nodes, mfgs) in enumerate(tq): for step, (input_nodes, output_nodes, mfgs) in enumerate(tq):
inputs = mfgs[0].srcdata['feat'] inputs = mfgs[0].srcdata["feat"]
labels = mfgs[-1].dstdata['label'] labels = mfgs[-1].dstdata["label"]
predictions = model(mfgs, inputs) predictions = model(mfgs, inputs)
...@@ -232,6 +245,7 @@ with tqdm.tqdm(train_dataloader) as tq: ...@@ -232,6 +245,7 @@ with tqdm.tqdm(train_dataloader) as tq:
# Say you start with a GNN module that works for full-graph training only: # Say you start with a GNN module that works for full-graph training only:
# #
class SAGEConv(nn.Module): class SAGEConv(nn.Module):
"""Graph convolution module used by the GraphSAGE model. """Graph convolution module used by the GraphSAGE model.
...@@ -242,6 +256,7 @@ class SAGEConv(nn.Module): ...@@ -242,6 +256,7 @@ class SAGEConv(nn.Module):
out_feat : int out_feat : int
Output feature size. Output feature size.
""" """
def __init__(self, in_feat, out_feat): def __init__(self, in_feat, out_feat):
super().__init__() super().__init__()
# A linear submodule for projecting the input and neighbor feature to the output. # A linear submodule for projecting the input and neighbor feature to the output.
...@@ -258,10 +273,13 @@ class SAGEConv(nn.Module): ...@@ -258,10 +273,13 @@ class SAGEConv(nn.Module):
The input node feature. The input node feature.
""" """
with g.local_scope(): with g.local_scope():
g.ndata['h'] = h g.ndata["h"] = h
# update_all is a message passing API. # update_all is a message passing API.
g.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h_N')) g.update_all(
h_N = g.ndata['h_N'] message_func=fn.copy_u("h", "m"),
reduce_func=fn.mean("m", "h_N"),
)
h_N = g.ndata["h_N"]
h_total = torch.cat([h, h_N], dim=1) h_total = torch.cat([h, h_N], dim=1)
return self.linear(h_total) return self.linear(h_total)
...@@ -352,6 +370,7 @@ class SAGEConv(nn.Module): ...@@ -352,6 +370,7 @@ class SAGEConv(nn.Module):
# to something like the following: # to something like the following:
# #
class SAGEConvForBoth(nn.Module): class SAGEConvForBoth(nn.Module):
"""Graph convolution module used by the GraphSAGE model. """Graph convolution module used by the GraphSAGE model.
...@@ -362,6 +381,7 @@ class SAGEConvForBoth(nn.Module): ...@@ -362,6 +381,7 @@ class SAGEConvForBoth(nn.Module):
out_feat : int out_feat : int
Output feature size. Output feature size.
""" """
def __init__(self, in_feat, out_feat): def __init__(self, in_feat, out_feat):
super().__init__() super().__init__()
# A linear submodule for projecting the input and neighbor feature to the output. # A linear submodule for projecting the input and neighbor feature to the output.
...@@ -383,10 +403,13 @@ class SAGEConvForBoth(nn.Module): ...@@ -383,10 +403,13 @@ class SAGEConvForBoth(nn.Module):
else: else:
h_src = h_dst = h h_src = h_dst = h
g.srcdata['h'] = h_src g.srcdata["h"] = h_src
# update_all is a message passing API. # update_all is a message passing API.
g.update_all(message_func=fn.copy_u('h', 'm'), reduce_func=fn.mean('m', 'h_N')) g.update_all(
h_N = g.ndata['h_N'] message_func=fn.copy_u("h", "m"),
reduce_func=fn.mean("m", "h_N"),
)
h_N = g.ndata["h_N"]
h_total = torch.cat([h_dst, h_N], dim=1) h_total = torch.cat([h_dst, h_N], dim=1)
return self.linear(h_total) return self.linear(h_total)
......
...@@ -20,189 +20,186 @@ Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We explain ...@@ -20,189 +20,186 @@ Convolutional Networks <https://arxiv.org/pdf/1609.02907.pdf>`_). We explain
what is under the hood of the :class:`~dgl.nn.GraphConv` module. what is under the hood of the :class:`~dgl.nn.GraphConv` module.
The reader is expected to learn how to define a new GNN layer using DGL's The reader is expected to learn how to define a new GNN layer using DGL's
message passing APIs. message passing APIs.
""" """
############################################################################### ###############################################################################
# Model Overview # Model Overview
# ------------------------------------------ # ------------------------------------------
# GCN from the perspective of message passing # GCN from the perspective of message passing
# ``````````````````````````````````````````````` # ```````````````````````````````````````````````
# We describe a layer of graph convolutional neural network from a message # We describe a layer of graph convolutional neural network from a message
# passing perspective; the math can be found `here <math_>`_. # passing perspective; the math can be found `here <math_>`_.
# It boils down to the following step, for each node :math:`u`: # It boils down to the following step, for each node :math:`u`:
# #
# 1) Aggregate neighbors' representations :math:`h_{v}` to produce an # 1) Aggregate neighbors' representations :math:`h_{v}` to produce an
# intermediate representation :math:`\hat{h}_u`. 2) Transform the aggregated # intermediate representation :math:`\hat{h}_u`. 2) Transform the aggregated
# representation :math:`\hat{h}_{u}` with a linear projection followed by a # representation :math:`\hat{h}_{u}` with a linear projection followed by a
# non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`. # non-linearity: :math:`h_{u} = f(W_{u} \hat{h}_u)`.
# #
# We will implement step 1 with DGL message passing, and step 2 by # We will implement step 1 with DGL message passing, and step 2 by
# PyTorch ``nn.Module``. # PyTorch ``nn.Module``.
# #
# GCN implementation with DGL # GCN implementation with DGL
# `````````````````````````````````````````` # ``````````````````````````````````````````
# We first define the message and reduce function as usual. Since the # We first define the message and reduce function as usual. Since the
# aggregation on a node :math:`u` only involves summing over the neighbors' # aggregation on a node :math:`u` only involves summing over the neighbors'
# representations :math:`h_v`, we can simply use builtin functions: # representations :math:`h_v`, we can simply use builtin functions:
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import torch as th os.environ["DGLBACKEND"] = "pytorch"
import torch.nn as nn import dgl
import torch.nn.functional as F import dgl.function as fn
import torch as th
import dgl import torch.nn as nn
import dgl.function as fn import torch.nn.functional as F
from dgl import DGLGraph from dgl import DGLGraph
gcn_msg = fn.copy_u(u="h", out="m") gcn_msg = fn.copy_u(u="h", out="m")
gcn_reduce = fn.sum(msg="m", out="h") gcn_reduce = fn.sum(msg="m", out="h")
############################################################################### ###############################################################################
# We then proceed to define the GCNLayer module. A GCNLayer essentially performs # We then proceed to define the GCNLayer module. A GCNLayer essentially performs
# message passing on all the nodes then applies a fully-connected layer. # message passing on all the nodes then applies a fully-connected layer.
# #
# .. note:: # .. note::
# #
# This is showing how to implement a GCN from scratch. DGL provides a more # This is showing how to implement a GCN from scratch. DGL provides a more
# efficient :class:`builtin GCN layer module <dgl.nn.pytorch.conv.GraphConv>`. # efficient :class:`builtin GCN layer module <dgl.nn.pytorch.conv.GraphConv>`.
# #
class GCNLayer(nn.Module): class GCNLayer(nn.Module):
def __init__(self, in_feats, out_feats): def __init__(self, in_feats, out_feats):
super(GCNLayer, self).__init__() super(GCNLayer, self).__init__()
self.linear = nn.Linear(in_feats, out_feats) self.linear = nn.Linear(in_feats, out_feats)
def forward(self, g, feature): def forward(self, g, feature):
# Creating a local scope so that all the stored ndata and edata # Creating a local scope so that all the stored ndata and edata
# (such as the `'h'` ndata below) are automatically popped out # (such as the `'h'` ndata below) are automatically popped out
# when the scope exits. # when the scope exits.
with g.local_scope(): with g.local_scope():
g.ndata["h"] = feature g.ndata["h"] = feature
g.update_all(gcn_msg, gcn_reduce) g.update_all(gcn_msg, gcn_reduce)
h = g.ndata["h"] h = g.ndata["h"]
return self.linear(h) return self.linear(h)
############################################################################### ###############################################################################
# The forward function is essentially the same as any other commonly seen NNs # The forward function is essentially the same as any other commonly seen NNs
# model in PyTorch. We can initialize GCN like any ``nn.Module``. For example, # model in PyTorch. We can initialize GCN like any ``nn.Module``. For example,
# let's define a simple neural network consisting of two GCN layers. Suppose we # let's define a simple neural network consisting of two GCN layers. Suppose we
# are training the classifier for the cora dataset (the input feature size is # are training the classifier for the cora dataset (the input feature size is
# 1433 and the number of classes is 7). The last GCN layer computes node embeddings, # 1433 and the number of classes is 7). The last GCN layer computes node embeddings,
# so the last layer in general does not apply activation. # so the last layer in general does not apply activation.
class Net(nn.Module): class Net(nn.Module):
def __init__(self): def __init__(self):
super(Net, self).__init__() super(Net, self).__init__()
self.layer1 = GCNLayer(1433, 16) self.layer1 = GCNLayer(1433, 16)
self.layer2 = GCNLayer(16, 7) self.layer2 = GCNLayer(16, 7)
def forward(self, g, features): def forward(self, g, features):
x = F.relu(self.layer1(g, features)) x = F.relu(self.layer1(g, features))
x = self.layer2(g, x) x = self.layer2(g, x)
return x return x
net = Net() net = Net()
print(net) print(net)
############################################################################### ###############################################################################
# We load the cora dataset using DGL's built-in data module. # We load the cora dataset using DGL's built-in data module.
from dgl.data import CoraGraphDataset from dgl.data import CoraGraphDataset
def load_cora_data(): def load_cora_data():
dataset = CoraGraphDataset() dataset = CoraGraphDataset()
g = dataset[0] g = dataset[0]
features = g.ndata["feat"] features = g.ndata["feat"]
labels = g.ndata["label"] labels = g.ndata["label"]
train_mask = g.ndata["train_mask"] train_mask = g.ndata["train_mask"]
test_mask = g.ndata["test_mask"] test_mask = g.ndata["test_mask"]
return g, features, labels, train_mask, test_mask return g, features, labels, train_mask, test_mask
############################################################################### ###############################################################################
# When a model is trained, we can use the following method to evaluate # When a model is trained, we can use the following method to evaluate
# the performance of the model on the test dataset: # the performance of the model on the test dataset:
def evaluate(model, g, features, labels, mask): def evaluate(model, g, features, labels, mask):
model.eval() model.eval()
with th.no_grad(): with th.no_grad():
logits = model(g, features) logits = model(g, features)
logits = logits[mask] logits = logits[mask]
labels = labels[mask] labels = labels[mask]
_, indices = th.max(logits, dim=1) _, indices = th.max(logits, dim=1)
correct = th.sum(indices == labels) correct = th.sum(indices == labels)
return correct.item() * 1.0 / len(labels) return correct.item() * 1.0 / len(labels)
############################################################################### ###############################################################################
# We then train the network as follows: # We then train the network as follows:
import time import time
import numpy as np import numpy as np
g, features, labels, train_mask, test_mask = load_cora_data() g, features, labels, train_mask, test_mask = load_cora_data()
# Add edges between each node and itself to preserve old node representations # Add edges between each node and itself to preserve old node representations
g.add_edges(g.nodes(), g.nodes()) g.add_edges(g.nodes(), g.nodes())
optimizer = th.optim.Adam(net.parameters(), lr=1e-2) optimizer = th.optim.Adam(net.parameters(), lr=1e-2)
dur = [] dur = []
for epoch in range(50): for epoch in range(50):
if epoch >= 3: if epoch >= 3:
t0 = time.time() t0 = time.time()
net.train()
net.train() logits = net(g, features)
logits = net(g, features) logp = F.log_softmax(logits, 1)
logp = F.log_softmax(logits, 1) loss = F.nll_loss(logp[train_mask], labels[train_mask])
loss = F.nll_loss(logp[train_mask], labels[train_mask])
optimizer.zero_grad()
optimizer.zero_grad() loss.backward()
loss.backward() optimizer.step()
optimizer.step()
if epoch >= 3:
if epoch >= 3: dur.append(time.time() - t0)
dur.append(time.time() - t0) acc = evaluate(net, g, features, labels, test_mask)
print(
acc = evaluate(net, g, features, labels, test_mask) "Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format(
print( epoch, loss.item(), acc, np.mean(dur)
"Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} | Time(s) {:.4f}".format( )
epoch, loss.item(), acc, np.mean(dur) )
) ###############################################################################
) # .. _math:
#
############################################################################### # GCN in one formula
# .. _math: # ------------------
# # Mathematically, the GCN model follows this formula:
# GCN in one formula #
# ------------------ # :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})`
# Mathematically, the GCN model follows this formula: #
# # Here, :math:`H^{(l)}` denotes the :math:`l^{th}` layer in the network,
# :math:`H^{(l+1)} = \sigma(\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}H^{(l)}W^{(l)})` # :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for
# # this layer. :math:`\tilde{D}` and :math:`\tilde{A}` are separately the degree
# Here, :math:`H^{(l)}` denotes the :math:`l^{th}` layer in the network, # and adjacency matrices for the graph. With the superscript ~, we are referring
# :math:`\sigma` is the non-linearity, and :math:`W` is the weight matrix for # to the variant where we add additional edges between each node and itself to
# this layer. :math:`\tilde{D}` and :math:`\tilde{A}` are separately the degree # preserve its old representation in graph convolutions. The shape of the input
# and adjacency matrices for the graph. With the superscript ~, we are referring # :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes
# to the variant where we add additional edges between each node and itself to # and :math:`D` is the number of input features. We can chain up multiple
# preserve its old representation in graph convolutions. The shape of the input # layers as such to produce a node-level representation output with shape
# :math:`H^{(0)}` is :math:`N \times D`, where :math:`N` is the number of nodes # :math:`N \times F`, where :math:`F` is the dimension of the output node
# and :math:`D` is the number of input features. We can chain up multiple # feature vector.
# layers as such to produce a node-level representation output with shape #
# :math:`N \times F`, where :math:`F` is the dimension of the output node # The equation can be efficiently implemented using sparse matrix
# feature vector. # multiplication kernels (such as Kipf's
# # `pygcn <https://github.com/tkipf/pygcn>`_ code). The above DGL implementation
# The equation can be efficiently implemented using sparse matrix # in fact has already used this trick due to the use of builtin functions.
# multiplication kernels (such as Kipf's #
# `pygcn <https://github.com/tkipf/pygcn>`_ code). The above DGL implementation # Note that the tutorial code implements a simplified version of GCN where we
# in fact has already used this trick due to the use of builtin functions. # replace :math:`\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}` with
# # :math:`\tilde{A}`. For a full implementation, see our example
# Note that the tutorial code implements a simplified version of GCN where we # `here <https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn>`_.
# replace :math:`\tilde{D}^{-\frac{1}{2}}\tilde{A}\tilde{D}^{-\frac{1}{2}}` with
# :math:`\tilde{A}`. For a full implementation, see our example
# `here <https://github.com/dmlc/dgl/tree/master/examples/pytorch/gcn>`_.
...@@ -29,340 +29,389 @@ subject, relation, object. Edges thus encode important information and ...@@ -29,340 +29,389 @@ subject, relation, object. Edges thus encode important information and
have their own embeddings to be learned. Furthermore, there may exist have their own embeddings to be learned. Furthermore, there may exist
multiple edges among any given pair. multiple edges among any given pair.
""" """
############################################################################### ###############################################################################
# A brief introduction to R-GCN # A brief introduction to R-GCN
# --------------------------- # ---------------------------
# In *statistical relational learning* (SRL), there are two fundamental # In *statistical relational learning* (SRL), there are two fundamental
# tasks: # tasks:
# #
# - **Entity classification** - Where you assign types and categorical # - **Entity classification** - Where you assign types and categorical
# properties to entities. # properties to entities.
# - **Link prediction** - Where you recover missing triples. # - **Link prediction** - Where you recover missing triples.
# #
# In both cases, missing information is expected to be recovered from the # In both cases, missing information is expected to be recovered from the
# neighborhood structure of the graph. For example, the R-GCN # neighborhood structure of the graph. For example, the R-GCN
# paper cited earlier provides the following example. Knowing that Mikhail Baryshnikov was educated at the Vaganova Academy # paper cited earlier provides the following example. Knowing that Mikhail Baryshnikov was educated at the Vaganova Academy
# implies both that Mikhail Baryshnikov should have the label person, and # implies both that Mikhail Baryshnikov should have the label person, and
# that the triple (Mikhail Baryshnikov, lived in, Russia) must belong to the # that the triple (Mikhail Baryshnikov, lived in, Russia) must belong to the
# knowledge graph. # knowledge graph.
# #
# R-GCN solves these two problems using a common graph convolutional network. It's # R-GCN solves these two problems using a common graph convolutional network. It's
# extended with multi-edge encoding to compute embedding of the entities, but # extended with multi-edge encoding to compute embedding of the entities, but
# with different downstream processing. # with different downstream processing.
# #
# - Entity classification is done by attaching a softmax classifier at the # - Entity classification is done by attaching a softmax classifier at the
# final embedding of an entity (node). Training is through loss of standard # final embedding of an entity (node). Training is through loss of standard
# cross-entropy. # cross-entropy.
# - Link prediction is done by reconstructing an edge with an autoencoder # - Link prediction is done by reconstructing an edge with an autoencoder
# architecture, using a parameterized score function. Training uses negative # architecture, using a parameterized score function. Training uses negative
# sampling. # sampling.
# #
# This tutorial focuses on the first task, entity classification, to show how to generate entity # This tutorial focuses on the first task, entity classification, to show how to generate entity
# representation. `Complete # representation. `Complete
# code <https://github.com/dmlc/dgl/tree/master/examples/pytorch/rgcn>`_ # code <https://github.com/dmlc/dgl/tree/master/examples/pytorch/rgcn>`_
# for both tasks is found in the DGL Github repository. # for both tasks is found in the DGL Github repository.
# #
# Key ideas of R-GCN # Key ideas of R-GCN
# ------------------- # -------------------
# Recall that in GCN, the hidden representation for each node :math:`i` at # Recall that in GCN, the hidden representation for each node :math:`i` at
# :math:`(l+1)^{th}` layer is computed by: # :math:`(l+1)^{th}` layer is computed by:
# #
# .. math:: h_i^{l+1} = \sigma\left(\sum_{j\in N_i}\frac{1}{c_i} W^{(l)} h_j^{(l)}\right)~~~~~~~~~~(1)\\ # .. math:: h_i^{l+1} = \sigma\left(\sum_{j\in N_i}\frac{1}{c_i} W^{(l)} h_j^{(l)}\right)~~~~~~~~~~(1)\\
# #
# where :math:`c_i` is a normalization constant. # where :math:`c_i` is a normalization constant.
# #
# The key difference between R-GCN and GCN is that in R-GCN, edges can # The key difference between R-GCN and GCN is that in R-GCN, edges can
# represent different relations. In GCN, weight :math:`W^{(l)}` in equation # represent different relations. In GCN, weight :math:`W^{(l)}` in equation
# :math:`(1)` is shared by all edges in layer :math:`l`. In contrast, in # :math:`(1)` is shared by all edges in layer :math:`l`. In contrast, in
# R-GCN, different edge types use different weights and only edges of the # R-GCN, different edge types use different weights and only edges of the
# same relation type :math:`r` are associated with the same projection weight # same relation type :math:`r` are associated with the same projection weight
# :math:`W_r^{(l)}`. # :math:`W_r^{(l)}`.
# #
# So the hidden representation of entities in :math:`(l+1)^{th}` layer in # So the hidden representation of entities in :math:`(l+1)^{th}` layer in
# R-GCN can be formulated as the following equation: # R-GCN can be formulated as the following equation:
# #
# .. math:: h_i^{l+1} = \sigma\left(W_0^{(l)}h_i^{(l)}+\sum_{r\in R}\sum_{j\in N_i^r}\frac{1}{c_{i,r}}W_r^{(l)}h_j^{(l)}\right)~~~~~~~~~~(2)\\ # .. math:: h_i^{l+1} = \sigma\left(W_0^{(l)}h_i^{(l)}+\sum_{r\in R}\sum_{j\in N_i^r}\frac{1}{c_{i,r}}W_r^{(l)}h_j^{(l)}\right)~~~~~~~~~~(2)\\
# #
# where :math:`N_i^r` denotes the set of neighbor indices of node :math:`i` # where :math:`N_i^r` denotes the set of neighbor indices of node :math:`i`
# under relation :math:`r\in R` and :math:`c_{i,r}` is a normalization # under relation :math:`r\in R` and :math:`c_{i,r}` is a normalization
# constant. In entity classification, the R-GCN paper uses # constant. In entity classification, the R-GCN paper uses
# :math:`c_{i,r}=|N_i^r|`. # :math:`c_{i,r}=|N_i^r|`.
# #
# The problem of applying the above equation directly is the rapid growth of # The problem of applying the above equation directly is the rapid growth of
# the number of parameters, especially with highly multi-relational data. In # the number of parameters, especially with highly multi-relational data. In
# order to reduce model parameter size and prevent overfitting, the original # order to reduce model parameter size and prevent overfitting, the original
# paper proposes to use basis decomposition. # paper proposes to use basis decomposition.
# #
# .. math:: W_r^{(l)}=\sum\limits_{b=1}^B a_{rb}^{(l)}V_b^{(l)}~~~~~~~~~~(3)\\ # .. math:: W_r^{(l)}=\sum\limits_{b=1}^B a_{rb}^{(l)}V_b^{(l)}~~~~~~~~~~(3)\\
# #
# Therefore, the weight :math:`W_r^{(l)}` is a linear combination of basis # Therefore, the weight :math:`W_r^{(l)}` is a linear combination of basis
# transformation :math:`V_b^{(l)}` with coefficients :math:`a_{rb}^{(l)}`. # transformation :math:`V_b^{(l)}` with coefficients :math:`a_{rb}^{(l)}`.
# The number of bases :math:`B` is much smaller than the number of relations # The number of bases :math:`B` is much smaller than the number of relations
# in the knowledge base. # in the knowledge base.
# #
# .. note:: # .. note::
# Another weight regularization, block-decomposition, is implemented in # Another weight regularization, block-decomposition, is implemented in
# the `link prediction <link-prediction_>`_. # the `link prediction <link-prediction_>`_.
# #
# Implement R-GCN in DGL # Implement R-GCN in DGL
# ---------------------- # ----------------------
# #
# An R-GCN model is composed of several R-GCN layers. The first R-GCN layer # An R-GCN model is composed of several R-GCN layers. The first R-GCN layer
# also serves as input layer and takes in features (for example, description texts) # also serves as input layer and takes in features (for example, description texts)
# that are associated with node entity and project to hidden space. In this tutorial, # that are associated with node entity and project to hidden space. In this tutorial,
# we only use the entity ID as an entity feature. # we only use the entity ID as an entity feature.
# #
# R-GCN layers # R-GCN layers
# ~~~~~~~~~~~~ # ~~~~~~~~~~~~
# #
# For each node, an R-GCN layer performs the following steps: # For each node, an R-GCN layer performs the following steps:
# #
# - Compute outgoing message using node representation and weight matrix # - Compute outgoing message using node representation and weight matrix
# associated with the edge type (message function) # associated with the edge type (message function)
# - Aggregate incoming messages and generate new node representations (reduce # - Aggregate incoming messages and generate new node representations (reduce
# and apply function) # and apply function)
# #
# The following code is the definition of an R-GCN hidden layer. # The following code is the definition of an R-GCN hidden layer.
# #
# .. note:: # .. note::
# Each relation type is associated with a different weight. Therefore, # Each relation type is associated with a different weight. Therefore,
# the full weight matrix has three dimensions: relation, input_feature, # the full weight matrix has three dimensions: relation, input_feature,
# output_feature. # output_feature.
# #
# .. note:: # .. note::
# #
# This is showing how to implement an R-GCN from scratch. DGL provides a more # This is showing how to implement an R-GCN from scratch. DGL provides a more
# efficient :class:`builtin R-GCN layer module <dgl.nn.pytorch.conv.RelGraphConv>`. # efficient :class:`builtin R-GCN layer module <dgl.nn.pytorch.conv.RelGraphConv>`.
# #
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import dgl os.environ["DGLBACKEND"] = "pytorch"
import torch from functools import partial
import torch.nn as nn
import torch.nn.functional as F import dgl
from dgl import DGLGraph import dgl.function as fn
import dgl.function as fn import torch
from functools import partial import torch.nn as nn
import torch.nn.functional as F
class RGCNLayer(nn.Module): from dgl import DGLGraph
def __init__(self, in_feat, out_feat, num_rels, num_bases=-1, bias=None,
activation=None, is_input_layer=False):
super(RGCNLayer, self).__init__() class RGCNLayer(nn.Module):
self.in_feat = in_feat def __init__(
self.out_feat = out_feat self,
self.num_rels = num_rels in_feat,
self.num_bases = num_bases out_feat,
self.bias = bias num_rels,
self.activation = activation num_bases=-1,
self.is_input_layer = is_input_layer bias=None,
activation=None,
# sanity check is_input_layer=False,
if self.num_bases <= 0 or self.num_bases > self.num_rels: ):
self.num_bases = self.num_rels super(RGCNLayer, self).__init__()
self.in_feat = in_feat
# weight bases in equation (3) self.out_feat = out_feat
self.weight = nn.Parameter(torch.Tensor(self.num_bases, self.in_feat, self.num_rels = num_rels
self.out_feat)) self.num_bases = num_bases
if self.num_bases < self.num_rels: self.bias = bias
# linear combination coefficients in equation (3) self.activation = activation
self.w_comp = nn.Parameter(torch.Tensor(self.num_rels, self.num_bases)) self.is_input_layer = is_input_layer
# add bias # sanity check
if self.bias: if self.num_bases <= 0 or self.num_bases > self.num_rels:
self.bias = nn.Parameter(torch.Tensor(out_feat)) self.num_bases = self.num_rels
# weight bases in equation (3)
# init trainable parameters self.weight = nn.Parameter(
nn.init.xavier_uniform_(self.weight, torch.Tensor(self.num_bases, self.in_feat, self.out_feat)
gain=nn.init.calculate_gain('relu')) )
if self.num_bases < self.num_rels: if self.num_bases < self.num_rels:
nn.init.xavier_uniform_(self.w_comp, # linear combination coefficients in equation (3)
gain=nn.init.calculate_gain('relu')) self.w_comp = nn.Parameter(
if self.bias: torch.Tensor(self.num_rels, self.num_bases)
nn.init.xavier_uniform_(self.bias, )
gain=nn.init.calculate_gain('relu')) # add bias
if self.bias:
def forward(self, g): self.bias = nn.Parameter(torch.Tensor(out_feat))
if self.num_bases < self.num_rels: # init trainable parameters
# generate all weights from bases (equation (3)) nn.init.xavier_uniform_(
weight = self.weight.view(self.in_feat, self.num_bases, self.out_feat) self.weight, gain=nn.init.calculate_gain("relu")
weight = torch.matmul(self.w_comp, weight).view(self.num_rels, )
self.in_feat, self.out_feat) if self.num_bases < self.num_rels:
else: nn.init.xavier_uniform_(
weight = self.weight self.w_comp, gain=nn.init.calculate_gain("relu")
)
if self.is_input_layer: if self.bias:
def message_func(edges): nn.init.xavier_uniform_(
# for input layer, matrix multiply can be converted to be self.bias, gain=nn.init.calculate_gain("relu")
# an embedding lookup using source node id )
embed = weight.view(-1, self.out_feat)
index = edges.data[dgl.ETYPE] * self.in_feat + edges.src['id'] def forward(self, g):
return {'msg': embed[index] * edges.data['norm']} if self.num_bases < self.num_rels:
else: # generate all weights from bases (equation (3))
def message_func(edges): weight = self.weight.view(
w = weight[edges.data[dgl.ETYPE]] self.in_feat, self.num_bases, self.out_feat
msg = torch.bmm(edges.src['h'].unsqueeze(1), w).squeeze() )
msg = msg * edges.data['norm'] weight = torch.matmul(self.w_comp, weight).view(
return {'msg': msg} self.num_rels, self.in_feat, self.out_feat
)
def apply_func(nodes): else:
h = nodes.data['h'] weight = self.weight
if self.bias: if self.is_input_layer:
h = h + self.bias
if self.activation: def message_func(edges):
h = self.activation(h) # for input layer, matrix multiply can be converted to be
return {'h': h} # an embedding lookup using source node id
embed = weight.view(-1, self.out_feat)
g.update_all(message_func, fn.sum(msg='msg', out='h'), apply_func) index = edges.data[dgl.ETYPE] * self.in_feat + edges.src["id"]
return {"msg": embed[index] * edges.data["norm"]}
############################################################################### else:
# Full R-GCN model defined
# ~~~~~~~~~~~~~~~~~~~~~~~ def message_func(edges):
w = weight[edges.data[dgl.ETYPE]]
class Model(nn.Module): msg = torch.bmm(edges.src["h"].unsqueeze(1), w).squeeze()
def __init__(self, num_nodes, h_dim, out_dim, num_rels, msg = msg * edges.data["norm"]
num_bases=-1, num_hidden_layers=1): return {"msg": msg}
super(Model, self).__init__()
self.num_nodes = num_nodes def apply_func(nodes):
self.h_dim = h_dim h = nodes.data["h"]
self.out_dim = out_dim if self.bias:
self.num_rels = num_rels h = h + self.bias
self.num_bases = num_bases if self.activation:
self.num_hidden_layers = num_hidden_layers h = self.activation(h)
return {"h": h}
# create rgcn layers
self.build_model() g.update_all(message_func, fn.sum(msg="msg", out="h"), apply_func)
# create initial features
self.features = self.create_features() ###############################################################################
# Full R-GCN model defined
def build_model(self): # ~~~~~~~~~~~~~~~~~~~~~~~
self.layers = nn.ModuleList()
# input to hidden
i2h = self.build_input_layer() class Model(nn.Module):
self.layers.append(i2h) def __init__(
# hidden to hidden self,
for _ in range(self.num_hidden_layers): num_nodes,
h2h = self.build_hidden_layer() h_dim,
self.layers.append(h2h) out_dim,
# hidden to output num_rels,
h2o = self.build_output_layer() num_bases=-1,
self.layers.append(h2o) num_hidden_layers=1,
):
# initialize feature for each node super(Model, self).__init__()
def create_features(self): self.num_nodes = num_nodes
features = torch.arange(self.num_nodes) self.h_dim = h_dim
return features self.out_dim = out_dim
self.num_rels = num_rels
def build_input_layer(self): self.num_bases = num_bases
return RGCNLayer(self.num_nodes, self.h_dim, self.num_rels, self.num_bases, self.num_hidden_layers = num_hidden_layers
activation=F.relu, is_input_layer=True)
# create rgcn layers
def build_hidden_layer(self): self.build_model()
return RGCNLayer(self.h_dim, self.h_dim, self.num_rels, self.num_bases,
activation=F.relu) # create initial features
self.features = self.create_features()
def build_output_layer(self):
return RGCNLayer(self.h_dim, self.out_dim, self.num_rels, self.num_bases, def build_model(self):
activation=partial(F.softmax, dim=1)) self.layers = nn.ModuleList()
# input to hidden
def forward(self, g): i2h = self.build_input_layer()
if self.features is not None: self.layers.append(i2h)
g.ndata['id'] = self.features # hidden to hidden
for layer in self.layers: for _ in range(self.num_hidden_layers):
layer(g) h2h = self.build_hidden_layer()
return g.ndata.pop('h') self.layers.append(h2h)
# hidden to output
############################################################################### h2o = self.build_output_layer()
# Handle dataset self.layers.append(h2o)
# ~~~~~~~~~~~~~~~~
# This tutorial uses Institute for Applied Informatics and Formal Description Methods (AIFB) dataset from R-GCN paper. # initialize feature for each node
def create_features(self):
# load graph data features = torch.arange(self.num_nodes)
dataset = dgl.data.rdf.AIFBDataset() return features
g = dataset[0]
category = dataset.predict_category def build_input_layer(self):
train_mask = g.nodes[category].data.pop('train_mask') return RGCNLayer(
test_mask = g.nodes[category].data.pop('test_mask') self.num_nodes,
train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze() self.h_dim,
test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze() self.num_rels,
labels = g.nodes[category].data.pop('label') self.num_bases,
num_rels = len(g.canonical_etypes) activation=F.relu,
num_classes = dataset.num_classes is_input_layer=True,
# normalization factor )
for cetype in g.canonical_etypes:
g.edges[cetype].data['norm'] = dgl.norm_by_dst(g, cetype).unsqueeze(1) def build_hidden_layer(self):
category_id = g.ntypes.index(category) return RGCNLayer(
self.h_dim,
############################################################################### self.h_dim,
# Create graph and model self.num_rels,
# ~~~~~~~~~~~~~~~~~~~~~~~ self.num_bases,
activation=F.relu,
# configurations )
n_hidden = 16 # number of hidden units
n_bases = -1 # use number of relations as number of bases def build_output_layer(self):
n_hidden_layers = 0 # use 1 input layer, 1 output layer, no hidden layer return RGCNLayer(
n_epochs = 25 # epochs to train self.h_dim,
lr = 0.01 # learning rate self.out_dim,
l2norm = 0 # L2 norm coefficient self.num_rels,
self.num_bases,
# create graph activation=partial(F.softmax, dim=1),
g = dgl.to_homogeneous(g, edata=['norm']) )
node_ids = torch.arange(g.num_nodes())
target_idx = node_ids[g.ndata[dgl.NTYPE] == category_id] def forward(self, g):
if self.features is not None:
# create model g.ndata["id"] = self.features
model = Model(g.num_nodes(), for layer in self.layers:
n_hidden, layer(g)
num_classes, return g.ndata.pop("h")
num_rels,
num_bases=n_bases,
num_hidden_layers=n_hidden_layers) ###############################################################################
# Handle dataset
############################################################################### # ~~~~~~~~~~~~~~~~
# Training loop # This tutorial uses Institute for Applied Informatics and Formal Description Methods (AIFB) dataset from R-GCN paper.
# ~~~~~~~~~~~~~~~~
# load graph data
# optimizer dataset = dgl.data.rdf.AIFBDataset()
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm) g = dataset[0]
category = dataset.predict_category
print("start training...") train_mask = g.nodes[category].data.pop("train_mask")
model.train() test_mask = g.nodes[category].data.pop("test_mask")
for epoch in range(n_epochs): train_idx = torch.nonzero(train_mask, as_tuple=False).squeeze()
optimizer.zero_grad() test_idx = torch.nonzero(test_mask, as_tuple=False).squeeze()
logits = model.forward(g) labels = g.nodes[category].data.pop("label")
logits = logits[target_idx] num_rels = len(g.canonical_etypes)
loss = F.cross_entropy(logits[train_idx], labels[train_idx]) num_classes = dataset.num_classes
loss.backward() # normalization factor
for cetype in g.canonical_etypes:
optimizer.step() g.edges[cetype].data["norm"] = dgl.norm_by_dst(g, cetype).unsqueeze(1)
category_id = g.ntypes.index(category)
train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx])
train_acc = train_acc.item() / len(train_idx) ###############################################################################
val_loss = F.cross_entropy(logits[test_idx], labels[test_idx]) # Create graph and model
val_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]) # ~~~~~~~~~~~~~~~~~~~~~~~
val_acc = val_acc.item() / len(test_idx)
print("Epoch {:05d} | ".format(epoch) + # configurations
"Train Accuracy: {:.4f} | Train Loss: {:.4f} | ".format( n_hidden = 16 # number of hidden units
train_acc, loss.item()) + n_bases = -1 # use number of relations as number of bases
"Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format( n_hidden_layers = 0 # use 1 input layer, 1 output layer, no hidden layer
val_acc, val_loss.item())) n_epochs = 25 # epochs to train
lr = 0.01 # learning rate
############################################################################### l2norm = 0 # L2 norm coefficient
# .. _link-prediction:
# # create graph
# The second task, link prediction g = dgl.to_homogeneous(g, edata=["norm"])
# -------------------------------- node_ids = torch.arange(g.num_nodes())
# So far, you have seen how to use DGL to implement entity classification with an target_idx = node_ids[g.ndata[dgl.NTYPE] == category_id]
# R-GCN model. In the knowledge base setting, representation generated by
# R-GCN can be used to uncover potential relationships between nodes. In the # create model
# R-GCN paper, the authors feed the entity representations generated by R-GCN model = Model(
# into the `DistMult <https://arxiv.org/pdf/1412.6575.pdf>`_ prediction model g.num_nodes(),
# to predict possible relationships. n_hidden,
# num_classes,
# The implementation is similar to that presented here, but with an extra DistMult layer num_rels,
# stacked on top of the R-GCN layers. You can find the complete num_bases=n_bases,
# implementation of link prediction with R-GCN in our `Github Python code num_hidden_layers=n_hidden_layers,
# example <https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/link.py>`_. )
###############################################################################
# Training loop
# ~~~~~~~~~~~~~~~~
# optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2norm)
print("start training...")
model.train()
for epoch in range(n_epochs):
optimizer.zero_grad()
logits = model.forward(g)
logits = logits[target_idx]
loss = F.cross_entropy(logits[train_idx], labels[train_idx])
loss.backward()
optimizer.step()
train_acc = torch.sum(logits[train_idx].argmax(dim=1) == labels[train_idx])
train_acc = train_acc.item() / len(train_idx)
val_loss = F.cross_entropy(logits[test_idx], labels[test_idx])
val_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx])
val_acc = val_acc.item() / len(test_idx)
print(
"Epoch {:05d} | ".format(epoch)
+ "Train Accuracy: {:.4f} | Train Loss: {:.4f} | ".format(
train_acc, loss.item()
)
+ "Validation Accuracy: {:.4f} | Validation loss: {:.4f}".format(
val_acc, val_loss.item()
)
)
###############################################################################
# .. _link-prediction:
#
# The second task, link prediction
# --------------------------------
# So far, you have seen how to use DGL to implement entity classification with an
# R-GCN model. In the knowledge base setting, representation generated by
# R-GCN can be used to uncover potential relationships between nodes. In the
# R-GCN paper, the authors feed the entity representations generated by R-GCN
# into the `DistMult <https://arxiv.org/pdf/1412.6575.pdf>`_ prediction model
# to predict possible relationships.
#
# The implementation is similar to that presented here, but with an extra DistMult layer
# stacked on top of the R-GCN layers. You can find the complete
# implementation of link prediction with R-GCN in our `Github Python code
# example <https://github.com/dmlc/dgl/blob/master/examples/pytorch/rgcn/link.py>`_.
...@@ -14,612 +14,640 @@ Line Graph Neural Network ...@@ -14,612 +14,640 @@ Line Graph Neural Network
efficiency. For recommended implementation, please refer to the `official efficiency. For recommended implementation, please refer to the `official
examples <https://github.com/dmlc/dgl/tree/master/examples>`_. examples <https://github.com/dmlc/dgl/tree/master/examples>`_.
""" """
########################################################################################### ###########################################################################################
# #
# In this tutorial, you learn how to solve community detection tasks by implementing a line # In this tutorial, you learn how to solve community detection tasks by implementing a line
# graph neural network (LGNN). Community detection, or graph clustering, consists of partitioning # graph neural network (LGNN). Community detection, or graph clustering, consists of partitioning
# the vertices in a graph into clusters in which nodes are more similar to # the vertices in a graph into clusters in which nodes are more similar to
# one another. # one another.
# #
# In the :doc:`Graph convolutinal network tutorial <1_gcn>`, you learned how to classify the nodes of an input # In the :doc:`Graph convolutinal network tutorial <1_gcn>`, you learned how to classify the nodes of an input
# graph in a semi-supervised setting. You used a graph convolutional neural network (GCN) # graph in a semi-supervised setting. You used a graph convolutional neural network (GCN)
# as an embedding mechanism for graph features. # as an embedding mechanism for graph features.
# #
# To generalize a graph neural network (GNN) into supervised community detection, a line-graph based # To generalize a graph neural network (GNN) into supervised community detection, a line-graph based
# variation of GNN is introduced in the research paper # variation of GNN is introduced in the research paper
# `Supervised Community Detection with Line Graph Neural Networks <https://arxiv.org/abs/1705.08415>`__. # `Supervised Community Detection with Line Graph Neural Networks <https://arxiv.org/abs/1705.08415>`__.
# One of the highlights of the model is # One of the highlights of the model is
# to augment the straightforward GNN architecture so that it operates on # to augment the straightforward GNN architecture so that it operates on
# a line graph of edge adjacencies, defined with a non-backtracking operator. # a line graph of edge adjacencies, defined with a non-backtracking operator.
# #
# A line graph neural network (LGNN) shows how DGL can implement an advanced graph algorithm by # A line graph neural network (LGNN) shows how DGL can implement an advanced graph algorithm by
# mixing basic tensor operations, sparse-matrix multiplication, and message- # mixing basic tensor operations, sparse-matrix multiplication, and message-
# passing APIs. # passing APIs.
# #
# In the following sections, you learn about community detection, line # In the following sections, you learn about community detection, line
# graphs, LGNN, and its implementation. # graphs, LGNN, and its implementation.
# #
# Supervised community detection task with the Cora dataset # Supervised community detection task with the Cora dataset
# -------------------------------------------- # --------------------------------------------
# Community detection # Community detection
# ~~~~~~~~~~~~~~~~~~~~ # ~~~~~~~~~~~~~~~~~~~~
# In a community detection task, you cluster similar nodes instead of # In a community detection task, you cluster similar nodes instead of
# labeling them. The node similarity is typically described as having higher inner # labeling them. The node similarity is typically described as having higher inner
# density within each cluster. # density within each cluster.
# #
# What's the difference between community detection and node classification? # What's the difference between community detection and node classification?
# Comparing to node classification, community detection focuses on retrieving # Comparing to node classification, community detection focuses on retrieving
# cluster information in the graph, rather than assigning a specific label to # cluster information in the graph, rather than assigning a specific label to
# a node. For example, as long as a node is clustered with its community # a node. For example, as long as a node is clustered with its community
# members, it doesn't matter whether the node is assigned as "community A", # members, it doesn't matter whether the node is assigned as "community A",
# or "community B", while assigning all "great movies" to label "bad movies" # or "community B", while assigning all "great movies" to label "bad movies"
# will be a disaster in a movie network classification task. # will be a disaster in a movie network classification task.
# #
# What's the difference then, between a community detection algorithm and # What's the difference then, between a community detection algorithm and
# other clustering algorithm such as k-means? Community detection algorithm operates on # other clustering algorithm such as k-means? Community detection algorithm operates on
# graph-structured data. Comparing to k-means, community detection leverages # graph-structured data. Comparing to k-means, community detection leverages
# graph structure, instead of simply clustering nodes based on their # graph structure, instead of simply clustering nodes based on their
# features. # features.
# #
# Cora dataset # Cora dataset
# ~~~~~ # ~~~~~
# To be consistent with the GCN tutorial, # To be consistent with the GCN tutorial,
# you use the `Cora dataset <https://linqs.soe.ucsc.edu/data>`__ # you use the `Cora dataset <https://linqs.soe.ucsc.edu/data>`__
# to illustrate a simple community detection task. Cora is a scientific publication dataset, # to illustrate a simple community detection task. Cora is a scientific publication dataset,
# with 2708 papers belonging to seven # with 2708 papers belonging to seven
# different machine learning fields. Here, you formulate Cora as a # different machine learning fields. Here, you formulate Cora as a
# directed graph, with each node being a paper, and each edge being a # directed graph, with each node being a paper, and each edge being a
# citation link (A->B means A cites B). Here is a visualization of the whole # citation link (A->B means A cites B). Here is a visualization of the whole
# Cora dataset. # Cora dataset.
# #
# .. figure:: https://i.imgur.com/X404Byc.png # .. figure:: https://i.imgur.com/X404Byc.png
# :alt: cora # :alt: cora
# :height: 400px # :height: 400px
# :width: 500px # :width: 500px
# :align: center # :align: center
# #
# Cora naturally contains seven classes, and statistics below show that each # Cora naturally contains seven classes, and statistics below show that each
# class does satisfy our assumption of community, i.e. nodes of same class # class does satisfy our assumption of community, i.e. nodes of same class
# class have higher connection probability among them than with nodes of different class. # class have higher connection probability among them than with nodes of different class.
# The following code snippet verifies that there are more intra-class edges # The following code snippet verifies that there are more intra-class edges
# than inter-class. # than inter-class.
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
import torch os.environ["DGLBACKEND"] = "pytorch"
import torch as th import dgl
import torch.nn as nn import torch
import torch.nn.functional as F import torch as th
import torch.nn as nn
import dgl import torch.nn.functional as F
from dgl.data import citation_graph as citegrh from dgl.data import citation_graph as citegrh
data = citegrh.load_cora() data = citegrh.load_cora()
G = data[0] G = data[0]
labels = th.tensor(G.ndata['label']) labels = th.tensor(G.ndata["label"])
# find all the nodes labeled with class 0 # find all the nodes labeled with class 0
label0_nodes = th.nonzero(labels == 0, as_tuple=False).squeeze() label0_nodes = th.nonzero(labels == 0, as_tuple=False).squeeze()
# find all the edges pointing to class 0 nodes # find all the edges pointing to class 0 nodes
src, _ = G.in_edges(label0_nodes) src, _ = G.in_edges(label0_nodes)
src_labels = labels[src] src_labels = labels[src]
# find all the edges whose both endpoints are in class 0 # find all the edges whose both endpoints are in class 0
intra_src = th.nonzero(src_labels == 0, as_tuple=False) intra_src = th.nonzero(src_labels == 0, as_tuple=False)
print('Intra-class edges percent: %.4f' % (len(intra_src) / len(src_labels))) print("Intra-class edges percent: %.4f" % (len(intra_src) / len(src_labels)))
########################################################################################### import matplotlib.pyplot as plt
# Binary community subgraph from Cora with a test dataset
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ###########################################################################################
# Without loss of generality, in this tutorial you limit the scope of the # Binary community subgraph from Cora with a test dataset
# task to binary community detection. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# # Without loss of generality, in this tutorial you limit the scope of the
# .. note:: # task to binary community detection.
# #
# To create a practice binary-community dataset from Cora, first extract # .. note::
# all two-class pairs from the original Cora seven classes. For each pair, you #
# treat each class as one community, and find the largest subgraph that # To create a practice binary-community dataset from Cora, first extract
# at least contains one cross-community edge as the training example. As # all two-class pairs from the original Cora seven classes. For each pair, you
# a result, there are a total of 21 training samples in this small dataset. # treat each class as one community, and find the largest subgraph that
# # at least contains one cross-community edge as the training example. As
# With the following code, you can visualize one of the training samples and its community structure. # a result, there are a total of 21 training samples in this small dataset.
#
import networkx as nx # With the following code, you can visualize one of the training samples and its community structure.
import matplotlib.pyplot as plt
import networkx as nx
train_set = dgl.data.CoraBinary()
G1, pmpd1, label1 = train_set[1] train_set = dgl.data.CoraBinary()
nx_G1 = G1.to_networkx() G1, pmpd1, label1 = train_set[1]
nx_G1 = G1.to_networkx()
def visualize(labels, g):
pos = nx.spring_layout(g, seed=1)
plt.figure(figsize=(8, 8)) def visualize(labels, g):
plt.axis('off') pos = nx.spring_layout(g, seed=1)
nx.draw_networkx(g, pos=pos, node_size=50, cmap=plt.get_cmap('coolwarm'), plt.figure(figsize=(8, 8))
node_color=labels, edge_color='k', plt.axis("off")
arrows=False, width=0.5, style='dotted', with_labels=False) nx.draw_networkx(
visualize(label1, nx_G1) g,
pos=pos,
########################################################################################### node_size=50,
# To learn more, go the original research paper to see how to generalize cmap=plt.get_cmap("coolwarm"),
# to multiple communities case. node_color=labels,
# edge_color="k",
# Community detection in a supervised setting arrows=False,
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ width=0.5,
# The community detection problem could be tackled with both supervised and style="dotted",
# unsupervised approaches. You can formulate with_labels=False,
# community detection in a supervised setting as follows: )
#
# - Each training example consists of :math:`(G, L)`, where :math:`G` is a
# directed graph :math:`(V, E)`. For each node :math:`v` in :math:`V`, we visualize(label1, nx_G1)
# assign a ground truth community label :math:`z_v \in \{0,1\}`.
# - The parameterized model :math:`f(G, \theta)` predicts a label set ###########################################################################################
# :math:`\tilde{Z} = f(G)` for nodes :math:`V`. # To learn more, go the original research paper to see how to generalize
# - For each example :math:`(G,L)`, the model learns to minimize a specially # to multiple communities case.
# designed loss function (equivariant loss) :math:`L_{equivariant} = #
# (\tilde{Z},Z)` # Community detection in a supervised setting
# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# .. note:: # The community detection problem could be tackled with both supervised and
# # unsupervised approaches. You can formulate
# In this supervised setting, the model naturally predicts a label for # community detection in a supervised setting as follows:
# each community. However, community assignment should be equivariant to #
# label permutations. To achieve this, in each forward process, we take # - Each training example consists of :math:`(G, L)`, where :math:`G` is a
# the minimum among losses calculated from all possible permutations of # directed graph :math:`(V, E)`. For each node :math:`v` in :math:`V`, we
# labels. # assign a ground truth community label :math:`z_v \in \{0,1\}`.
# # - The parameterized model :math:`f(G, \theta)` predicts a label set
# Mathematically, this means # :math:`\tilde{Z} = f(G)` for nodes :math:`V`.
# :math:`L_{equivariant} = \underset{\pi \in S_c} {min}-\log(\hat{\pi}, \pi)`, # - For each example :math:`(G,L)`, the model learns to minimize a specially
# where :math:`S_c` is the set of all permutations of labels, and # designed loss function (equivariant loss) :math:`L_{equivariant} =
# :math:`\hat{\pi}` is the set of predicted labels, # (\tilde{Z},Z)`
# :math:`- \log(\hat{\pi},\pi)` denotes negative log likelihood. #
# # .. note::
# For instance, for a sample graph with node :math:`\{1,2,3,4\}` and #
# community assignment :math:`\{A, A, A, B\}`, with each node's label # In this supervised setting, the model naturally predicts a label for
# :math:`l \in \{0,1\}`,The group of all possible permutations # each community. However, community assignment should be equivariant to
# :math:`S_c = \{\{0,0,0,1\}, \{1,1,1,0\}\}`. # label permutations. To achieve this, in each forward process, we take
# # the minimum among losses calculated from all possible permutations of
# Line graph neural network key ideas # labels.
# ------------------------------------ #
# An key innovation in this topic is the use of a line graph. # Mathematically, this means
# Unlike models in previous tutorials, message passing happens not only on the # :math:`L_{equivariant} = \underset{\pi \in S_c} {min}-\log(\hat{\pi}, \pi)`,
# original graph, e.g. the binary community subgraph from Cora, but also on the # where :math:`S_c` is the set of all permutations of labels, and
# line graph associated with the original graph. # :math:`\hat{\pi}` is the set of predicted labels,
# # :math:`- \log(\hat{\pi},\pi)` denotes negative log likelihood.
# What is a line-graph? #
# ~~~~~~~~~~~~~~~~~~~~~ # For instance, for a sample graph with node :math:`\{1,2,3,4\}` and
# In graph theory, line graph is a graph representation that encodes the # community assignment :math:`\{A, A, A, B\}`, with each node's label
# edge adjacency structure in the original graph. # :math:`l \in \{0,1\}`,The group of all possible permutations
# # :math:`S_c = \{\{0,0,0,1\}, \{1,1,1,0\}\}`.
# Specifically, a line-graph :math:`L(G)` turns an edge of the original graph `G` #
# into a node. This is illustrated with the graph below (taken from the # Line graph neural network key ideas
# research paper). # ------------------------------------
# # An key innovation in this topic is the use of a line graph.
# .. figure:: https://i.imgur.com/4WO5jEm.png # Unlike models in previous tutorials, message passing happens not only on the
# :alt: lg # original graph, e.g. the binary community subgraph from Cora, but also on the
# :align: center # line graph associated with the original graph.
# #
# Here, :math:`e_{A}:= (i\rightarrow j)` and :math:`e_{B}:= (j\rightarrow k)` # What is a line-graph?
# are two edges in the original graph :math:`G`. In line graph :math:`G_L`, # ~~~~~~~~~~~~~~~~~~~~~
# they correspond to nodes :math:`v^{l}_{A}, v^{l}_{B}`. # In graph theory, line graph is a graph representation that encodes the
# # edge adjacency structure in the original graph.
# The next natural question is, how to connect nodes in line-graph? How to #
# connect two edges? Here, we use the following connection rule: # Specifically, a line-graph :math:`L(G)` turns an edge of the original graph `G`
# # into a node. This is illustrated with the graph below (taken from the
# Two nodes :math:`v^{l}_{A}`, :math:`v^{l}_{B}` in `lg` are connected if # research paper).
# the corresponding two edges :math:`e_{A}, e_{B}` in `g` share one and only #
# one node: # .. figure:: https://i.imgur.com/4WO5jEm.png
# :math:`e_{A}`'s destination node is :math:`e_{B}`'s source node # :alt: lg
# (:math:`j`). # :align: center
# #
# .. note:: # Here, :math:`e_{A}:= (i\rightarrow j)` and :math:`e_{B}:= (j\rightarrow k)`
# # are two edges in the original graph :math:`G`. In line graph :math:`G_L`,
# Mathematically, this definition corresponds to a notion called non-backtracking # they correspond to nodes :math:`v^{l}_{A}, v^{l}_{B}`.
# operator: #
# :math:`B_{(i \rightarrow j), (\hat{i} \rightarrow \hat{j})}` # The next natural question is, how to connect nodes in line-graph? How to
# :math:`= \begin{cases} # connect two edges? Here, we use the following connection rule:
# 1 \text{ if } j = \hat{i}, \hat{j} \neq i\\ #
# 0 \text{ otherwise} \end{cases}` # Two nodes :math:`v^{l}_{A}`, :math:`v^{l}_{B}` in `lg` are connected if
# where an edge is formed if :math:`B_{node1, node2} = 1`. # the corresponding two edges :math:`e_{A}, e_{B}` in `g` share one and only
# # one node:
# # :math:`e_{A}`'s destination node is :math:`e_{B}`'s source node
# One layer in LGNN, algorithm structure # (:math:`j`).
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
# # .. note::
# LGNN chains together a series of line graph neural network layers. The graph #
# representation :math:`x` and its line graph companion :math:`y` evolve with # Mathematically, this definition corresponds to a notion called non-backtracking
# the dataflow as follows. # operator:
# # :math:`B_{(i \rightarrow j), (\hat{i} \rightarrow \hat{j})}`
# .. figure:: https://i.imgur.com/bZGGIGp.png # :math:`= \begin{cases}
# :alt: alg # 1 \text{ if } j = \hat{i}, \hat{j} \neq i\\
# :align: center # 0 \text{ otherwise} \end{cases}`
# # where an edge is formed if :math:`B_{node1, node2} = 1`.
# At the :math:`k`-th layer, the :math:`i`-th neuron of the :math:`l`-th #
# channel updates its embedding :math:`x^{(k+1)}_{i,l}` with: #
# # One layer in LGNN, algorithm structure
# .. math:: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# \begin{split} #
# x^{(k+1)}_{i,l} ={}&\rho[x^{(k)}_{i}\theta^{(k)}_{1,l} # LGNN chains together a series of line graph neural network layers. The graph
# +(Dx^{(k)})_{i}\theta^{(k)}_{2,l} \\ # representation :math:`x` and its line graph companion :math:`y` evolve with
# &+\sum^{J-1}_{j=0}(A^{2^{j}}x^{k})_{i}\theta^{(k)}_{3+j,l}\\ # the dataflow as follows.
# &+[\{\text{Pm},\text{Pd}\}y^{(k)}]_{i}\theta^{(k)}_{3+J,l}] \\ #
# &+\text{skip-connection} # .. figure:: https://i.imgur.com/bZGGIGp.png
# \qquad i \in V, l = 1,2,3, ... b_{k+1}/2 # :alt: alg
# \end{split} # :align: center
# #
# Then, the line-graph representation :math:`y^{(k+1)}_{i,l}` with, # At the :math:`k`-th layer, the :math:`i`-th neuron of the :math:`l`-th
# # channel updates its embedding :math:`x^{(k+1)}_{i,l}` with:
# .. math:: #
# # .. math::
# \begin{split} # \begin{split}
# y^{(k+1)}_{i',l^{'}} = {}&\rho[y^{(k)}_{i^{'}}\gamma^{(k)}_{1,l^{'}}+ # x^{(k+1)}_{i,l} ={}&\rho[x^{(k)}_{i}\theta^{(k)}_{1,l}
# (D_{L(G)}y^{(k)})_{i^{'}}\gamma^{(k)}_{2,l^{'}}\\ # +(Dx^{(k)})_{i}\theta^{(k)}_{2,l} \\
# &+\sum^{J-1}_{j=0}(A_{L(G)}^{2^{j}}y^{k})_{i}\gamma^{(k)}_{3+j,l^{'}}\\ # &+\sum^{J-1}_{j=0}(A^{2^{j}}x^{k})_{i}\theta^{(k)}_{3+j,l}\\
# &+[\{\text{Pm},\text{Pd}\}^{T}x^{(k+1)}]_{i^{'}}\gamma^{(k)}_{3+J,l^{'}}]\\ # &+[\{\text{Pm},\text{Pd}\}y^{(k)}]_{i}\theta^{(k)}_{3+J,l}] \\
# &+\text{skip-connection} # &+\text{skip-connection}
# \qquad i^{'} \in V_{l}, l^{'} = 1,2,3, ... b^{'}_{k+1}/2 # \qquad i \in V, l = 1,2,3, ... b_{k+1}/2
# \end{split} # \end{split}
# #
# Where :math:`\text{skip-connection}` refers to performing the same operation without the non-linearity # Then, the line-graph representation :math:`y^{(k+1)}_{i,l}` with,
# :math:`\rho`, and with linear projection :math:`\theta_\{\frac{b_{k+1}}{2} + 1, ..., b_{k+1}-1, b_{k+1}\}` #
# and :math:`\gamma_\{\frac{b_{k+1}}{2} + 1, ..., b_{k+1}-1, b_{k+1}\}`. # .. math::
# #
# Implement LGNN in DGL # \begin{split}
# --------------------- # y^{(k+1)}_{i',l^{'}} = {}&\rho[y^{(k)}_{i^{'}}\gamma^{(k)}_{1,l^{'}}+
# Even though the equations in the previous section might seem intimidating, # (D_{L(G)}y^{(k)})_{i^{'}}\gamma^{(k)}_{2,l^{'}}\\
# it helps to understand the following information before you implement the LGNN. # &+\sum^{J-1}_{j=0}(A_{L(G)}^{2^{j}}y^{k})_{i}\gamma^{(k)}_{3+j,l^{'}}\\
# # &+[\{\text{Pm},\text{Pd}\}^{T}x^{(k+1)}]_{i^{'}}\gamma^{(k)}_{3+J,l^{'}}]\\
# The two equations are symmetric and can be implemented as two instances # &+\text{skip-connection}
# of the same class with different parameters. # \qquad i^{'} \in V_{l}, l^{'} = 1,2,3, ... b^{'}_{k+1}/2
# The first equation operates on graph representation :math:`x`, # \end{split}
# whereas the second operates on line-graph #
# representation :math:`y`. Let us denote this abstraction as :math:`f`. Then # Where :math:`\text{skip-connection}` refers to performing the same operation without the non-linearity
# the first is :math:`f(x,y; \theta_x)`, and the second # :math:`\rho`, and with linear projection :math:`\theta_\{\frac{b_{k+1}}{2} + 1, ..., b_{k+1}-1, b_{k+1}\}`
# is :math:`f(y,x, \theta_y)`. That is, they are parameterized to compute # and :math:`\gamma_\{\frac{b_{k+1}}{2} + 1, ..., b_{k+1}-1, b_{k+1}\}`.
# representations of the original graph and its #
# companion line graph, respectively. # Implement LGNN in DGL
# # ---------------------
# Each equation consists of four terms. Take the first one as an example, which follows. # Even though the equations in the previous section might seem intimidating,
# # it helps to understand the following information before you implement the LGNN.
# - :math:`x^{(k)}\theta^{(k)}_{1,l}`, a linear projection of previous #
# layer's output :math:`x^{(k)}`, denote as :math:`\text{prev}(x)`. # The two equations are symmetric and can be implemented as two instances
# - :math:`(Dx^{(k)})\theta^{(k)}_{2,l}`, a linear projection of degree # of the same class with different parameters.
# operator on :math:`x^{(k)}`, denote as :math:`\text{deg}(x)`. # The first equation operates on graph representation :math:`x`,
# - :math:`\sum^{J-1}_{j=0}(A^{2^{j}}x^{(k)})\theta^{(k)}_{3+j,l}`, # whereas the second operates on line-graph
# a summation of :math:`2^{j}` adjacency operator on :math:`x^{(k)}`, # representation :math:`y`. Let us denote this abstraction as :math:`f`. Then
# denote as :math:`\text{radius}(x)` # the first is :math:`f(x,y; \theta_x)`, and the second
# - :math:`[\{Pm,Pd\}y^{(k)}]\theta^{(k)}_{3+J,l}`, fusing another # is :math:`f(y,x, \theta_y)`. That is, they are parameterized to compute
# graph's embedding information using incidence matrix # representations of the original graph and its
# :math:`\{Pm, Pd\}`, followed with a linear projection, # companion line graph, respectively.
# denote as :math:`\text{fuse}(y)`. #
# # Each equation consists of four terms. Take the first one as an example, which follows.
# Each of the terms are performed again with different #
# parameters, and without the nonlinearity after the sum. # - :math:`x^{(k)}\theta^{(k)}_{1,l}`, a linear projection of previous
# Therefore, :math:`f` could be written as: # layer's output :math:`x^{(k)}`, denote as :math:`\text{prev}(x)`.
# # - :math:`(Dx^{(k)})\theta^{(k)}_{2,l}`, a linear projection of degree
# .. math:: # operator on :math:`x^{(k)}`, denote as :math:`\text{deg}(x)`.
# \begin{split} # - :math:`\sum^{J-1}_{j=0}(A^{2^{j}}x^{(k)})\theta^{(k)}_{3+j,l}`,
# f(x^{(k)},y^{(k)}) = {}\rho[&\text{prev}(x^{(k-1)}) + \text{deg}(x^{(k-1)}) +\text{radius}(x^{k-1}) # a summation of :math:`2^{j}` adjacency operator on :math:`x^{(k)}`,
# +\text{fuse}(y^{(k)})]\\ # denote as :math:`\text{radius}(x)`
# +&\text{prev}(x^{(k-1)}) + \text{deg}(x^{(k-1)}) +\text{radius}(x^{k-1}) +\text{fuse}(y^{(k)}) # - :math:`[\{Pm,Pd\}y^{(k)}]\theta^{(k)}_{3+J,l}`, fusing another
# \end{split} # graph's embedding information using incidence matrix
# # :math:`\{Pm, Pd\}`, followed with a linear projection,
# Two equations are chained-up in the following order: # denote as :math:`\text{fuse}(y)`.
# #
# .. math:: # Each of the terms are performed again with different
# \begin{split} # parameters, and without the nonlinearity after the sum.
# x^{(k+1)} = {}& f(x^{(k)}, y^{(k)})\\ # Therefore, :math:`f` could be written as:
# y^{(k+1)} = {}& f(y^{(k)}, x^{(k+1)}) #
# \end{split} # .. math::
# # \begin{split}
# Keep in mind the listed observations in this overview and proceed to implementation. # f(x^{(k)},y^{(k)}) = {}\rho[&\text{prev}(x^{(k-1)}) + \text{deg}(x^{(k-1)}) +\text{radius}(x^{k-1})
# An important point is that you use different strategies for the noted terms. # +\text{fuse}(y^{(k)})]\\
# # +&\text{prev}(x^{(k-1)}) + \text{deg}(x^{(k-1)}) +\text{radius}(x^{k-1}) +\text{fuse}(y^{(k)})
# .. note:: # \end{split}
# You can understand :math:`\{Pm, Pd\}` more thoroughly with this explanation. #
# Roughly speaking, there is a relationship between how :math:`g` and # Two equations are chained-up in the following order:
# :math:`lg` (the line graph) work together with loopy brief propagation. #
# Here, you implement :math:`\{Pm, Pd\}` as a SciPy COO sparse matrix in the dataset, # .. math::
# and stack them as tensors when batching. Another batching solution is to # \begin{split}
# treat :math:`\{Pm, Pd\}` as the adjacency matrix of a bipartite graph, which maps # x^{(k+1)} = {}& f(x^{(k)}, y^{(k)})\\
# line graph's feature to graph's, and vice versa. # y^{(k+1)} = {}& f(y^{(k)}, x^{(k+1)})
# # \end{split}
# Implementing :math:`\text{prev}` and :math:`\text{deg}` as tensor operation #
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # Keep in mind the listed observations in this overview and proceed to implementation.
# Linear projection and degree operation are both simply matrix # An important point is that you use different strategies for the noted terms.
# multiplication. Write them as PyTorch tensor operations. #
# # .. note::
# In ``__init__``, you define the projection variables. # You can understand :math:`\{Pm, Pd\}` more thoroughly with this explanation.
# # Roughly speaking, there is a relationship between how :math:`g` and
# :: # :math:`lg` (the line graph) work together with loopy brief propagation.
# # Here, you implement :math:`\{Pm, Pd\}` as a SciPy COO sparse matrix in the dataset,
# self.linear_prev = nn.Linear(in_feats, out_feats) # and stack them as tensors when batching. Another batching solution is to
# self.linear_deg = nn.Linear(in_feats, out_feats) # treat :math:`\{Pm, Pd\}` as the adjacency matrix of a bipartite graph, which maps
# # line graph's feature to graph's, and vice versa.
# #
# In ``forward()``, :math:`\text{prev}` and :math:`\text{deg}` are the same # Implementing :math:`\text{prev}` and :math:`\text{deg}` as tensor operation
# as any other PyTorch tensor operations. # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# # Linear projection and degree operation are both simply matrix
# :: # multiplication. Write them as PyTorch tensor operations.
# #
# prev_proj = self.linear_prev(feat_a) # In ``__init__``, you define the projection variables.
# deg_proj = self.linear_deg(deg * feat_a) #
# # ::
# Implementing :math:`\text{radius}` as message passing in DGL #
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # self.linear_prev = nn.Linear(in_feats, out_feats)
# As discussed in GCN tutorial, you can formulate one adjacency operator as # self.linear_deg = nn.Linear(in_feats, out_feats)
# doing one-step message passing. As a generalization, :math:`2^j` adjacency #
# operations can be formulated as performing :math:`2^j` step of message #
# passing. Therefore, the summation is equivalent to summing nodes' # In ``forward()``, :math:`\text{prev}` and :math:`\text{deg}` are the same
# representation of :math:`2^j, j=0, 1, 2..` step message passing, i.e. # as any other PyTorch tensor operations.
# gathering information in :math:`2^{j}` neighborhood of each node. #
# # ::
# In ``__init__``, define the projection variables used in each #
# :math:`2^j` steps of message passing. # prev_proj = self.linear_prev(feat_a)
# # deg_proj = self.linear_deg(deg * feat_a)
# :: #
# # Implementing :math:`\text{radius}` as message passing in DGL
# self.linear_radius = nn.ModuleList( # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# [nn.Linear(in_feats, out_feats) for i in range(radius)]) # As discussed in GCN tutorial, you can formulate one adjacency operator as
# # doing one-step message passing. As a generalization, :math:`2^j` adjacency
# In ``__forward__``, use following function ``aggregate_radius()`` to # operations can be formulated as performing :math:`2^j` step of message
# gather data from multiple hops. This can be seen in the following code. # passing. Therefore, the summation is equivalent to summing nodes'
# Note that the ``update_all`` is called multiple times. # representation of :math:`2^j, j=0, 1, 2..` step message passing, i.e.
# gathering information in :math:`2^{j}` neighborhood of each node.
# Return a list containing features gathered from multiple radius. #
import dgl.function as fn # In ``__init__``, define the projection variables used in each
def aggregate_radius(radius, g, z): # :math:`2^j` steps of message passing.
# initializing list to collect message passing result #
z_list = [] # ::
g.ndata['z'] = z #
# pulling message from 1-hop neighbourhood # self.linear_radius = nn.ModuleList(
g.update_all(fn.copy_u(u='z', out='m'), fn.sum(msg='m', out='z')) # [nn.Linear(in_feats, out_feats) for i in range(radius)])
z_list.append(g.ndata['z']) #
for i in range(radius - 1): # In ``__forward__``, use following function ``aggregate_radius()`` to
for j in range(2 ** i): # gather data from multiple hops. This can be seen in the following code.
#pulling message from 2^j neighborhood # Note that the ``update_all`` is called multiple times.
g.update_all(fn.copy_u(u='z', out='m'), fn.sum(msg='m', out='z'))
z_list.append(g.ndata['z']) # Return a list containing features gathered from multiple radius.
return z_list import dgl.function as fn
#########################################################################
# Implementing :math:`\text{fuse}` as sparse matrix multiplication def aggregate_radius(radius, g, z):
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # initializing list to collect message passing result
# :math:`\{Pm, Pd\}` is a sparse matrix with only two non-zero entries on z_list = []
# each column. Therefore, you construct it as a sparse matrix in the dataset, g.ndata["z"] = z
# and implement :math:`\text{fuse}` as a sparse matrix multiplication. # pulling message from 1-hop neighbourhood
# g.update_all(fn.copy_u(u="z", out="m"), fn.sum(msg="m", out="z"))
# in ``__forward__``: z_list.append(g.ndata["z"])
# for i in range(radius - 1):
# :: for j in range(2**i):
# # pulling message from 2^j neighborhood
# fuse = self.linear_fuse(th.mm(pm_pd, feat_b)) g.update_all(fn.copy_u(u="z", out="m"), fn.sum(msg="m", out="z"))
# z_list.append(g.ndata["z"])
# Completing :math:`f(x, y)` return z_list
# ~~~~~~~~~~~~~~~~~~~~~~~~~~
# Finally, the following shows how to sum up all the terms together, pass it to skip connection, and
# batch norm. #########################################################################
# # Implementing :math:`\text{fuse}` as sparse matrix multiplication
# :: # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# # :math:`\{Pm, Pd\}` is a sparse matrix with only two non-zero entries on
# result = prev_proj + deg_proj + radius_proj + fuse # each column. Therefore, you construct it as a sparse matrix in the dataset,
# # and implement :math:`\text{fuse}` as a sparse matrix multiplication.
# Pass result to skip connection. #
# # in ``__forward__``:
# :: #
# # ::
# result = th.cat([result[:, :n], F.relu(result[:, n:])], 1) #
# # fuse = self.linear_fuse(th.mm(pm_pd, feat_b))
# Then pass the result to batch norm. #
# # Completing :math:`f(x, y)`
# :: # ~~~~~~~~~~~~~~~~~~~~~~~~~~
# # Finally, the following shows how to sum up all the terms together, pass it to skip connection, and
# result = self.bn(result) #Batch Normalization. # batch norm.
# #
# # ::
# Here is the complete code for one LGNN layer's abstraction :math:`f(x,y)` #
class LGNNCore(nn.Module): # result = prev_proj + deg_proj + radius_proj + fuse
def __init__(self, in_feats, out_feats, radius): #
super(LGNNCore, self).__init__() # Pass result to skip connection.
self.out_feats = out_feats #
self.radius = radius # ::
#
self.linear_prev = nn.Linear(in_feats, out_feats) # result = th.cat([result[:, :n], F.relu(result[:, n:])], 1)
self.linear_deg = nn.Linear(in_feats, out_feats) #
self.linear_radius = nn.ModuleList( # Then pass the result to batch norm.
[nn.Linear(in_feats, out_feats) for i in range(radius)]) #
self.linear_fuse = nn.Linear(in_feats, out_feats) # ::
self.bn = nn.BatchNorm1d(out_feats) #
# result = self.bn(result) #Batch Normalization.
def forward(self, g, feat_a, feat_b, deg, pm_pd): #
# term "prev" #
prev_proj = self.linear_prev(feat_a) # Here is the complete code for one LGNN layer's abstraction :math:`f(x,y)`
# term "deg" class LGNNCore(nn.Module):
deg_proj = self.linear_deg(deg * feat_a) def __init__(self, in_feats, out_feats, radius):
super(LGNNCore, self).__init__()
# term "radius" self.out_feats = out_feats
# aggregate 2^j-hop features self.radius = radius
hop2j_list = aggregate_radius(self.radius, g, feat_a)
# apply linear transformation self.linear_prev = nn.Linear(in_feats, out_feats)
hop2j_list = [linear(x) for linear, x in zip(self.linear_radius, hop2j_list)] self.linear_deg = nn.Linear(in_feats, out_feats)
radius_proj = sum(hop2j_list) self.linear_radius = nn.ModuleList(
[nn.Linear(in_feats, out_feats) for i in range(radius)]
# term "fuse" )
fuse = self.linear_fuse(th.mm(pm_pd, feat_b)) self.linear_fuse = nn.Linear(in_feats, out_feats)
self.bn = nn.BatchNorm1d(out_feats)
# sum them together
result = prev_proj + deg_proj + radius_proj + fuse def forward(self, g, feat_a, feat_b, deg, pm_pd):
# term "prev"
# skip connection and batch norm prev_proj = self.linear_prev(feat_a)
n = self.out_feats // 2 # term "deg"
result = th.cat([result[:, :n], F.relu(result[:, n:])], 1) deg_proj = self.linear_deg(deg * feat_a)
result = self.bn(result)
# term "radius"
return result # aggregate 2^j-hop features
hop2j_list = aggregate_radius(self.radius, g, feat_a)
############################################################################################################## # apply linear transformation
# Chain-up LGNN abstractions as an LGNN layer hop2j_list = [
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ linear(x) for linear, x in zip(self.linear_radius, hop2j_list)
# To implement: ]
# radius_proj = sum(hop2j_list)
# .. math::
# \begin{split} # term "fuse"
# x^{(k+1)} = {}& f(x^{(k)}, y^{(k)})\\ fuse = self.linear_fuse(th.mm(pm_pd, feat_b))
# y^{(k+1)} = {}& f(y^{(k)}, x^{(k+1)})
# \end{split} # sum them together
# result = prev_proj + deg_proj + radius_proj + fuse
# Chain-up two ``LGNNCore`` instances, as in the example code, with different parameters in the forward pass.
class LGNNLayer(nn.Module): # skip connection and batch norm
def __init__(self, in_feats, out_feats, radius): n = self.out_feats // 2
super(LGNNLayer, self).__init__() result = th.cat([result[:, :n], F.relu(result[:, n:])], 1)
self.g_layer = LGNNCore(in_feats, out_feats, radius) result = self.bn(result)
self.lg_layer = LGNNCore(in_feats, out_feats, radius)
return result
def forward(self, g, lg, x, lg_x, deg_g, deg_lg, pm_pd):
next_x = self.g_layer(g, x, lg_x, deg_g, pm_pd)
pm_pd_y = th.transpose(pm_pd, 0, 1) ##############################################################################################################
next_lg_x = self.lg_layer(lg, lg_x, x, deg_lg, pm_pd_y) # Chain-up LGNN abstractions as an LGNN layer
return next_x, next_lg_x # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# To implement:
######################################################################################## #
# Chain-up LGNN layers # .. math::
# ~~~~~~~~~~~~~~~~~~~~ # \begin{split}
# Define an LGNN with three hidden layers, as in the following example. # x^{(k+1)} = {}& f(x^{(k)}, y^{(k)})\\
class LGNN(nn.Module): # y^{(k+1)} = {}& f(y^{(k)}, x^{(k+1)})
def __init__(self, radius): # \end{split}
super(LGNN, self).__init__() #
self.layer1 = LGNNLayer(1, 16, radius) # input is scalar feature # Chain-up two ``LGNNCore`` instances, as in the example code, with different parameters in the forward pass.
self.layer2 = LGNNLayer(16, 16, radius) # hidden size is 16 class LGNNLayer(nn.Module):
self.layer3 = LGNNLayer(16, 16, radius) def __init__(self, in_feats, out_feats, radius):
self.linear = nn.Linear(16, 2) # predice two classes super(LGNNLayer, self).__init__()
self.g_layer = LGNNCore(in_feats, out_feats, radius)
def forward(self, g, lg, pm_pd): self.lg_layer = LGNNCore(in_feats, out_feats, radius)
# compute the degrees
deg_g = g.in_degrees().float().unsqueeze(1) def forward(self, g, lg, x, lg_x, deg_g, deg_lg, pm_pd):
deg_lg = lg.in_degrees().float().unsqueeze(1) next_x = self.g_layer(g, x, lg_x, deg_g, pm_pd)
# use degree as the input feature pm_pd_y = th.transpose(pm_pd, 0, 1)
x, lg_x = deg_g, deg_lg next_lg_x = self.lg_layer(lg, lg_x, x, deg_lg, pm_pd_y)
x, lg_x = self.layer1(g, lg, x, lg_x, deg_g, deg_lg, pm_pd) return next_x, next_lg_x
x, lg_x = self.layer2(g, lg, x, lg_x, deg_g, deg_lg, pm_pd)
x, lg_x = self.layer3(g, lg, x, lg_x, deg_g, deg_lg, pm_pd)
return self.linear(x) ########################################################################################
######################################################################################### # Chain-up LGNN layers
# Training and inference # ~~~~~~~~~~~~~~~~~~~~
# ----------------------- # Define an LGNN with three hidden layers, as in the following example.
# First load the data. class LGNN(nn.Module):
from torch.utils.data import DataLoader def __init__(self, radius):
training_loader = DataLoader(train_set, super(LGNN, self).__init__()
batch_size=1, self.layer1 = LGNNLayer(1, 16, radius) # input is scalar feature
collate_fn=train_set.collate_fn, self.layer2 = LGNNLayer(16, 16, radius) # hidden size is 16
drop_last=True) self.layer3 = LGNNLayer(16, 16, radius)
self.linear = nn.Linear(16, 2) # predice two classes
#######################################################################################
# Next, define the main training loop. Note that each training sample contains def forward(self, g, lg, pm_pd):
# three objects: A :class:`~dgl.DGLGraph`, a SciPy sparse matrix ``pmpd``, and a label # compute the degrees
# array in ``numpy.ndarray``. Generate the line graph by using this command: deg_g = g.in_degrees().float().unsqueeze(1)
# deg_lg = lg.in_degrees().float().unsqueeze(1)
# :: # use degree as the input feature
# x, lg_x = deg_g, deg_lg
# lg = g.line_graph(backtracking=False) x, lg_x = self.layer1(g, lg, x, lg_x, deg_g, deg_lg, pm_pd)
# x, lg_x = self.layer2(g, lg, x, lg_x, deg_g, deg_lg, pm_pd)
# Note that ``backtracking=False`` is required to correctly simulate non-backtracking x, lg_x = self.layer3(g, lg, x, lg_x, deg_g, deg_lg, pm_pd)
# operation. We also define a utility function to convert the SciPy sparse matrix to return self.linear(x)
# torch sparse tensor.
# Create the model #########################################################################################
model = LGNN(radius=3) # Training and inference
# define the optimizer # -----------------------
optimizer = th.optim.Adam(model.parameters(), lr=1e-2) # First load the data.
from torch.utils.data import DataLoader
# A utility function to convert a scipy.coo_matrix to torch.SparseFloat
def sparse2th(mat): training_loader = DataLoader(
value = mat.data train_set, batch_size=1, collate_fn=train_set.collate_fn, drop_last=True
indices = th.LongTensor([mat.row, mat.col]) )
tensor = th.sparse.FloatTensor(indices, th.from_numpy(value).float(), mat.shape)
return tensor #######################################################################################
# Next, define the main training loop. Note that each training sample contains
# Train for 20 epochs # three objects: A :class:`~dgl.DGLGraph`, a SciPy sparse matrix ``pmpd``, and a label
for i in range(20): # array in ``numpy.ndarray``. Generate the line graph by using this command:
all_loss = [] #
all_acc = [] # ::
for [g, pmpd, label] in training_loader: #
# Generate the line graph. # lg = g.line_graph(backtracking=False)
lg = g.line_graph(backtracking=False) #
# Create torch tensors # Note that ``backtracking=False`` is required to correctly simulate non-backtracking
pmpd = sparse2th(pmpd) # operation. We also define a utility function to convert the SciPy sparse matrix to
label = th.from_numpy(label) # torch sparse tensor.
# Forward # Create the model
z = model(g, lg, pmpd) model = LGNN(radius=3)
# define the optimizer
# Calculate loss: optimizer = th.optim.Adam(model.parameters(), lr=1e-2)
# Since there are only two communities, there are only two permutations
# of the community labels. # A utility function to convert a scipy.coo_matrix to torch.SparseFloat
loss_perm1 = F.cross_entropy(z, label) def sparse2th(mat):
loss_perm2 = F.cross_entropy(z, 1 - label) value = mat.data
loss = th.min(loss_perm1, loss_perm2) indices = th.LongTensor([mat.row, mat.col])
tensor = th.sparse.FloatTensor(
# Calculate accuracy: indices, th.from_numpy(value).float(), mat.shape
_, pred = th.max(z, 1) )
acc_perm1 = (pred == label).float().mean() return tensor
acc_perm2 = (pred == 1 - label).float().mean()
acc = th.max(acc_perm1, acc_perm2)
all_loss.append(loss.item()) # Train for 20 epochs
all_acc.append(acc.item()) for i in range(20):
all_loss = []
optimizer.zero_grad() all_acc = []
loss.backward() for [g, pmpd, label] in training_loader:
optimizer.step() # Generate the line graph.
lg = g.line_graph(backtracking=False)
niters = len(all_loss) # Create torch tensors
print("Epoch %d | loss %.4f | accuracy %.4f" % (i, pmpd = sparse2th(pmpd)
sum(all_loss) / niters, sum(all_acc) / niters)) label = th.from_numpy(label)
####################################################################################### # Forward
# Visualize training progress z = model(g, lg, pmpd)
# -----------------------------
# You can visualize the network's community prediction on one training example, # Calculate loss:
# together with the ground truth. Start this with the following code example. # Since there are only two communities, there are only two permutations
# of the community labels.
pmpd1 = sparse2th(pmpd1) loss_perm1 = F.cross_entropy(z, label)
LG1 = G1.line_graph(backtracking=False) loss_perm2 = F.cross_entropy(z, 1 - label)
z = model(G1, LG1, pmpd1) loss = th.min(loss_perm1, loss_perm2)
_, pred = th.max(z, 1)
visualize(pred, nx_G1) # Calculate accuracy:
_, pred = th.max(z, 1)
####################################################################################### acc_perm1 = (pred == label).float().mean()
# Compared with the ground truth. Note that the color might be reversed for the acc_perm2 = (pred == 1 - label).float().mean()
# two communities because the model is for correctly predicting the partitioning. acc = th.max(acc_perm1, acc_perm2)
visualize(label1, nx_G1) all_loss.append(loss.item())
all_acc.append(acc.item())
#########################################
# Here is an animation to better understand the process. (40 epochs) optimizer.zero_grad()
# loss.backward()
# .. figure:: https://i.imgur.com/KDUyE1S.gif optimizer.step()
# :alt: lgnn-anim niters = len(all_loss)
# print(
# Batching graphs for parallelism "Epoch %d | loss %.4f | accuracy %.4f"
# -------------------------------- % (i, sum(all_loss) / niters, sum(all_acc) / niters)
# )
# LGNN takes a collection of different graphs. #######################################################################################
# You might consider whether batching can be used for parallelism. # Visualize training progress
# # -----------------------------
# Batching has been into the data loader itself. # You can visualize the network's community prediction on one training example,
# In the ``collate_fn`` for PyTorch data loader, graphs are batched using DGL's # together with the ground truth. Start this with the following code example.
# batched_graph API. DGL batches graphs by merging them
# into a large graph, with each smaller graph's adjacency matrix being a block pmpd1 = sparse2th(pmpd1)
# along the diagonal of the large graph's adjacency matrix. Concatenate LG1 = G1.line_graph(backtracking=False)
# :math`\{Pm,Pd\}` as block diagonal matrix in correspondence to DGL batched z = model(G1, LG1, pmpd1)
# graph API. _, pred = th.max(z, 1)
visualize(pred, nx_G1)
def collate_fn(batch):
graphs, pmpds, labels = zip(*batch) #######################################################################################
batched_graphs = dgl.batch(graphs) # Compared with the ground truth. Note that the color might be reversed for the
batched_pmpds = sp.block_diag(pmpds) # two communities because the model is for correctly predicting the partitioning.
batched_labels = np.concatenate(labels, axis=0) visualize(label1, nx_G1)
return batched_graphs, batched_pmpds, batched_labels
#########################################
###################################################################################### # Here is an animation to better understand the process. (40 epochs)
# You can find the complete code on Github at #
# `Community Detection with Graph Neural Networks (CDGNN) <https://github.com/dmlc/dgl/tree/master/examples/pytorch/line_graph>`_. # .. figure:: https://i.imgur.com/KDUyE1S.gif
# :alt: lgnn-anim
#
# Batching graphs for parallelism
# --------------------------------
#
# LGNN takes a collection of different graphs.
# You might consider whether batching can be used for parallelism.
#
# Batching has been into the data loader itself.
# In the ``collate_fn`` for PyTorch data loader, graphs are batched using DGL's
# batched_graph API. DGL batches graphs by merging them
# into a large graph, with each smaller graph's adjacency matrix being a block
# along the diagonal of the large graph's adjacency matrix. Concatenate
# :math`\{Pm,Pd\}` as block diagonal matrix in correspondence to DGL batched
# graph API.
def collate_fn(batch):
graphs, pmpds, labels = zip(*batch)
batched_graphs = dgl.batch(graphs)
batched_pmpds = sp.block_diag(pmpds)
batched_labels = np.concatenate(labels, axis=0)
return batched_graphs, batched_pmpds, batched_labels
######################################################################################
# You can find the complete code on Github at
# `Community Detection with Graph Neural Networks (CDGNN) <https://github.com/dmlc/dgl/tree/master/examples/pytorch/line_graph>`_.
...@@ -105,9 +105,8 @@ structure-free normalization, in the style of attention. ...@@ -105,9 +105,8 @@ structure-free normalization, in the style of attention.
# subpackage. Simply import the ``GATConv`` as the follows. # subpackage. Simply import the ``GATConv`` as the follows.
import os import os
os.environ['DGLBACKEND'] = 'pytorch'
from dgl.nn.pytorch import GATConv
os.environ["DGLBACKEND"] = "pytorch"
############################################################### ###############################################################
# Readers can skip the following step-by-step explanation of the implementation and # Readers can skip the following step-by-step explanation of the implementation and
# jump to the `Put everything together`_ for training and visualization results. # jump to the `Put everything together`_ for training and visualization results.
...@@ -125,6 +124,7 @@ from dgl.nn.pytorch import GATConv ...@@ -125,6 +124,7 @@ from dgl.nn.pytorch import GATConv
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from dgl.nn.pytorch import GATConv
class GATLayer(nn.Module): class GATLayer(nn.Module):
...@@ -139,37 +139,38 @@ class GATLayer(nn.Module): ...@@ -139,37 +139,38 @@ class GATLayer(nn.Module):
def reset_parameters(self): def reset_parameters(self):
"""Reinitialize learnable parameters.""" """Reinitialize learnable parameters."""
gain = nn.init.calculate_gain('relu') gain = nn.init.calculate_gain("relu")
nn.init.xavier_normal_(self.fc.weight, gain=gain) nn.init.xavier_normal_(self.fc.weight, gain=gain)
nn.init.xavier_normal_(self.attn_fc.weight, gain=gain) nn.init.xavier_normal_(self.attn_fc.weight, gain=gain)
def edge_attention(self, edges): def edge_attention(self, edges):
# edge UDF for equation (2) # edge UDF for equation (2)
z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1) z2 = torch.cat([edges.src["z"], edges.dst["z"]], dim=1)
a = self.attn_fc(z2) a = self.attn_fc(z2)
return {'e': F.leaky_relu(a)} return {"e": F.leaky_relu(a)}
def message_func(self, edges): def message_func(self, edges):
# message UDF for equation (3) & (4) # message UDF for equation (3) & (4)
return {'z': edges.src['z'], 'e': edges.data['e']} return {"z": edges.src["z"], "e": edges.data["e"]}
def reduce_func(self, nodes): def reduce_func(self, nodes):
# reduce UDF for equation (3) & (4) # reduce UDF for equation (3) & (4)
# equation (3) # equation (3)
alpha = F.softmax(nodes.mailbox['e'], dim=1) alpha = F.softmax(nodes.mailbox["e"], dim=1)
# equation (4) # equation (4)
h = torch.sum(alpha * nodes.mailbox['z'], dim=1) h = torch.sum(alpha * nodes.mailbox["z"], dim=1)
return {'h': h} return {"h": h}
def forward(self, h): def forward(self, h):
# equation (1) # equation (1)
z = self.fc(h) z = self.fc(h)
self.g.ndata['z'] = z self.g.ndata["z"] = z
# equation (2) # equation (2)
self.g.apply_edges(self.edge_attention) self.g.apply_edges(self.edge_attention)
# equation (3) & (4) # equation (3) & (4)
self.g.update_all(self.message_func, self.reduce_func) self.g.update_all(self.message_func, self.reduce_func)
return self.g.ndata.pop('h') return self.g.ndata.pop("h")
################################################################## ##################################################################
# Equation (1) # Equation (1)
...@@ -195,11 +196,13 @@ class GATLayer(nn.Module): ...@@ -195,11 +196,13 @@ class GATLayer(nn.Module):
# ``apply_edges`` API. The argument to the ``apply_edges`` is an **Edge UDF**, # ``apply_edges`` API. The argument to the ``apply_edges`` is an **Edge UDF**,
# which is defined as below: # which is defined as below:
def edge_attention(self, edges): def edge_attention(self, edges):
# edge UDF for equation (2) # edge UDF for equation (2)
z2 = torch.cat([edges.src['z'], edges.dst['z']], dim=1) z2 = torch.cat([edges.src["z"], edges.dst["z"]], dim=1)
a = self.attn_fc(z2) a = self.attn_fc(z2)
return {'e' : F.leaky_relu(a)} return {"e": F.leaky_relu(a)}
########################################################################3 ########################################################################3
# Here, the dot product with the learnable weight vector :math:`\vec{a^{(l)}}` # Here, the dot product with the learnable weight vector :math:`\vec{a^{(l)}}`
...@@ -229,13 +232,15 @@ def edge_attention(self, edges): ...@@ -229,13 +232,15 @@ def edge_attention(self, edges):
# Both tasks first fetch data from the mailbox and then manipulate it on the # Both tasks first fetch data from the mailbox and then manipulate it on the
# second dimension (``dim=1``), on which the messages are batched. # second dimension (``dim=1``), on which the messages are batched.
def reduce_func(self, nodes): def reduce_func(self, nodes):
# reduce UDF for equation (3) & (4) # reduce UDF for equation (3) & (4)
# equation (3) # equation (3)
alpha = F.softmax(nodes.mailbox['e'], dim=1) alpha = F.softmax(nodes.mailbox["e"], dim=1)
# equation (4) # equation (4)
h = torch.sum(alpha * nodes.mailbox['z'], dim=1) h = torch.sum(alpha * nodes.mailbox["z"], dim=1)
return {'h' : h} return {"h": h}
##################################################################### #####################################################################
# Multi-head attention # Multi-head attention
...@@ -258,8 +263,9 @@ def reduce_func(self, nodes): ...@@ -258,8 +263,9 @@ def reduce_func(self, nodes):
# Use the above defined single-head ``GATLayer`` as the building block # Use the above defined single-head ``GATLayer`` as the building block
# for the ``MultiHeadGATLayer`` below: # for the ``MultiHeadGATLayer`` below:
class MultiHeadGATLayer(nn.Module): class MultiHeadGATLayer(nn.Module):
def __init__(self, g, in_dim, out_dim, num_heads, merge='cat'): def __init__(self, g, in_dim, out_dim, num_heads, merge="cat"):
super(MultiHeadGATLayer, self).__init__() super(MultiHeadGATLayer, self).__init__()
self.heads = nn.ModuleList() self.heads = nn.ModuleList()
for i in range(num_heads): for i in range(num_heads):
...@@ -268,19 +274,21 @@ class MultiHeadGATLayer(nn.Module): ...@@ -268,19 +274,21 @@ class MultiHeadGATLayer(nn.Module):
def forward(self, h): def forward(self, h):
head_outs = [attn_head(h) for attn_head in self.heads] head_outs = [attn_head(h) for attn_head in self.heads]
if self.merge == 'cat': if self.merge == "cat":
# concat on the output feature dimension (dim=1) # concat on the output feature dimension (dim=1)
return torch.cat(head_outs, dim=1) return torch.cat(head_outs, dim=1)
else: else:
# merge using average # merge using average
return torch.mean(torch.stack(head_outs)) return torch.mean(torch.stack(head_outs))
########################################################################### ###########################################################################
# Put everything together # Put everything together
# ^^^^^^^^^^^^^^^^^^^^^^^ # ^^^^^^^^^^^^^^^^^^^^^^^
# #
# Now, you can define a two-layer GAT model. # Now, you can define a two-layer GAT model.
class GAT(nn.Module): class GAT(nn.Module):
def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads): def __init__(self, g, in_dim, hidden_dim, out_dim, num_heads):
super(GAT, self).__init__() super(GAT, self).__init__()
...@@ -296,33 +304,34 @@ class GAT(nn.Module): ...@@ -296,33 +304,34 @@ class GAT(nn.Module):
h = self.layer2(h) h = self.layer2(h)
return h return h
import networkx as nx
############################################################################# #############################################################################
# We then load the Cora dataset using DGL's built-in data module. # We then load the Cora dataset using DGL's built-in data module.
from dgl import DGLGraph from dgl import DGLGraph
from dgl.data import citation_graph as citegrh from dgl.data import citation_graph as citegrh
import networkx as nx
def load_cora_data(): def load_cora_data():
data = citegrh.load_cora() data = citegrh.load_cora()
g = data[0] g = data[0]
mask = torch.BoolTensor(g.ndata['train_mask']) mask = torch.BoolTensor(g.ndata["train_mask"])
return g, g.ndata['feat'], g.ndata['label'], mask return g, g.ndata["feat"], g.ndata["label"], mask
############################################################################## ##############################################################################
# The training loop is exactly the same as in the GCN tutorial. # The training loop is exactly the same as in the GCN tutorial.
import time import time
import numpy as np import numpy as np
g, features, labels, mask = load_cora_data() g, features, labels, mask = load_cora_data()
# create the model, 2 heads, each head has hidden size 8 # create the model, 2 heads, each head has hidden size 8
net = GAT(g, net = GAT(g, in_dim=features.size()[1], hidden_dim=8, out_dim=7, num_heads=2)
in_dim=features.size()[1],
hidden_dim=8,
out_dim=7,
num_heads=2)
# create optimizer # create optimizer
optimizer = torch.optim.Adam(net.parameters(), lr=1e-3) optimizer = torch.optim.Adam(net.parameters(), lr=1e-3)
...@@ -344,8 +353,11 @@ for epoch in range(30): ...@@ -344,8 +353,11 @@ for epoch in range(30):
if epoch >= 3: if epoch >= 3:
dur.append(time.time() - t0) dur.append(time.time() - t0)
print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format( print(
epoch, loss.item(), np.mean(dur))) "Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f}".format(
epoch, loss.item(), np.mean(dur)
)
)
######################################################################### #########################################################################
# Visualizing and understanding attention learned # Visualizing and understanding attention learned
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment