Merge branch 'cpp' of https://github.com/jermainewang/dgl into line-graph

596ca471 · GaiYu0 · 52ed6a45 · 72f63455 · 596ca471 · 596ca471
Commit 596ca471 authored Oct 06, 2018 by GaiYu0
20 changed files
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
+# -*- coding: utf-8 -*-
+#
+# Configuration file for the Sphinx documentation builder.
+#
+# This file does only contain a selection of the most common options. For a
+# full list see the documentation:
+# http://www.sphinx-doc.org/en/master/config
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('../../python'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'DGL'
+copyright = '2018, DGL Team'
+author = 'DGL Team'
+
+# The short X.Y version
+version = '0.0.1'
+# The full version, including alpha/beta/rc tags
+release = '0.0.1'
+
+
+# -- General configuration ---------------------------------------------------
+
+# If your documentation needs a minimal Sphinx version, state it here.
+#
+# needs_sphinx = '1.0'
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.autosummary',
+    'sphinx.ext.coverage',
+    'sphinx.ext.mathjax',
+    'sphinx.ext.napoleon',
+    'sphinx.ext.viewcode',
+]
+
+# Add any paths that contain templates here, relative to this directory.
+templates_path = ['_templates']
+
+# The suffix(es) of source filenames.
+# You can specify multiple suffix as a list of string:
+#
+source_suffix = ['.rst', '.md']
+
+# The master toctree document.
+master_doc = 'index'
+
+# The language for content autogenerated by Sphinx. Refer to documentation
+# for a list of supported languages.
+#
+# This is also used if you do content translation via gettext catalogs.
+# Usually you set "language" from the command line for these cases.
+language = None
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = []
+
+# The name of the Pygments (syntax highlighting) style to use.
+pygments_style = None
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Theme options are theme-specific and customize the look and feel of a theme
+# further.  For a list of options available for each theme, see the
+# documentation.
+#
+# html_theme_options = {}
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+# Custom sidebar templates, must be a dictionary that maps document names
+# to template names.
+#
+# The default sidebars (for documents that don't match any pattern) are
+# defined by theme itself.  Builtin themes are using these templates by
+# default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
+# 'searchbox.html']``.
+#
+# html_sidebars = {}
+
+
+# -- Options for HTMLHelp output ---------------------------------------------
+
+# Output file base name for HTML help builder.
+htmlhelp_basename = 'dgldoc'
+
+
+# -- Options for LaTeX output ------------------------------------------------
+
+latex_elements = {
+    # The paper size ('letterpaper' or 'a4paper').
+    #
+    # 'papersize': 'letterpaper',
+
+    # The font size ('10pt', '11pt' or '12pt').
+    #
+    # 'pointsize': '10pt',
+
+    # Additional stuff for the LaTeX preamble.
+    #
+    # 'preamble': '',
+
+    # Latex figure (float) alignment
+    #
+    # 'figure_align': 'htbp',
+}
+
+# Grouping the document tree into LaTeX files. List of tuples
+# (source start file, target name, title,
+#  author, documentclass [howto, manual, or own class]).
+latex_documents = [
+    (master_doc, 'dgl.tex', 'dgl Documentation',
+     'DGL Team', 'manual'),
+]
+
+
+# -- Options for manual page output ------------------------------------------
+
+# One entry per manual page. List of tuples
+# (source start file, name, description, authors, manual section).
+man_pages = [
+    (master_doc, 'dgl', 'dgl Documentation',
+     [author], 1)
+]
+
+
+# -- Options for Texinfo output ----------------------------------------------
+
+# Grouping the document tree into Texinfo files. List of tuples
+# (source start file, target name, title, author,
+#  dir menu entry, description, category)
+texinfo_documents = [
+    (master_doc, 'dgl', 'dgl Documentation',
+     author, 'dgl', 'One line description of project.',
+     'Miscellaneous'),
+]
+
+
+# -- Options for Epub output -------------------------------------------------
+
+# Bibliographic Dublin Core info.
+epub_title = project
+
+# The unique identifier of the text. This can be a ISBN number
+# or the project homepage.
+#
+# epub_identifier = ''
+
+# A unique identification for the text.
+#
+# epub_uid = ''
+
+# A list of files that should not be packed into the epub file.
+epub_exclude_files = ['search.html']
+
+
+# -- Extension configuration -------------------------------------------------
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
+.. DGL documentation master file, created by
+   sphinx-quickstart on Fri Oct  5 14:18:01 2018.
+   You can adapt this file completely to your liking, but it should at least
+   contain the root `toctree` directive.
+
+Welcome to DGL's documentation!
+===============================
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents:
+
+Get Started
+-----------
+.. toctree::
+   :maxdepth: 2
+
+   install/index
+   tutorials/index
+
+API Reference
+-------------
+.. toctree::
+   :maxdepth: 2
+
+   api/python/index
+
+Index
+-----
+* :ref:`genindex`
--- a/docs/source/install/index.rst
+++ b/docs/source/install/index.rst
+Install DGL
+============
+
+At this stage, we recommend installing DGL from source. To quickly try out DGL and its demo/tutorials, checkout `Install from docker`_.
+
+Get source codes
+----------------
+First, download the source files from github. Note you need to use the ``--recursive`` option to
+also clone the submodules.
+
+.. code:: bash
+
+    git clone --recursive https://github.com/jermainewang/dgl.git
+
+You can also clone the repository first and type following commands:
+
+.. code:: bash
+
+    git submodule init
+    git submodule update
+
+
+Build shared library
+--------------------
+Before building the library, please make sure the following dependencies are installed
+(use ubuntu as an example):
+
+.. code:: bash
+
+    sudo apt-get update
+    sudo apt-get install -y python
+
+We use cmake (minimal version 2.8) to build the library.
+
+.. code:: bash
+
+    mkdir build
+    cd build
+    cmake ..
+    make -j4
+
+Build python binding
+--------------------
+DGL's python binding depends on following packages (tested version):
+
+* numpy (>= 1.14.0)
+* scipy (>= 1.1.0)
+* networkx (>= 2.1)
+
+To install them, use following command:
+
+.. code:: bash
+
+    pip install --user numpy scipy networkx
+
+There are several ways to setup DGL's python binding. We recommend developers at the current stage
+use environment variables to find python packages.
+
+.. code:: bash
+
+    export DGL_HOME=/path/to/dgl
+    export PYTHONPATH=$DGL_HOME$/python:${PYTHONPATH}
+    export DGL_LIBRARY_PATH=$DGL_HOME$/build
+
+The ``DGL_LIBRARY_PATH`` variable is used for our python package to locate the shared library
+built above. Use following command to test whether the installation is successful or not.
+
+.. code:: bash
+
+    python -c 'import dgl'
+
+Install from docker
+-------------------
+TBD
--- a/docs/source/tutorials/index.rst
+++ b/docs/source/tutorials/index.rst
+Tutorials
+=========
+
+TBD: Get started on DGL
--- a/examples/pytorch/gat/gat.py
+++ b/examples/pytorch/gat/gat.py
@@ -2,6 +2,8 @@
 Graph Attention Networks
 Paper: https://arxiv.org/abs/1710.10903
 Code: https://github.com/PetarV-/GAT
+
+GAT with batch processing
 """

 import argparse
@@ -10,6 +12,7 @@ import time
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
 from dgl import DGLGraph
 from dgl.data import register_data_args, load_data

@@ -22,15 +25,15 @@ class GATReduce(nn.Module):
        self.attn_drop = attn_drop

    def forward(self, node, msgs):
-        a1 = torch.unsqueeze(node['a1'], 0)  # shape (1, 1)
-        a2 = torch.cat([torch.unsqueeze(m['a2'], 0) for m in msgs], dim=0) # shape (deg, 1)
-        ft = torch.cat([torch.unsqueeze(m['ft'], 0) for m in msgs], dim=0) # shape (deg, D)
+        a1 = torch.unsqueeze(node['a1'], 1)  # shape (B, 1, 1)
+        a2 = msgs['a2'] # shape (B, deg, 1)
+        ft = msgs['ft'] # shape (B, deg, D)
        # attention
-        a = a1 + a2  # shape (deg, 1)
-        e = F.softmax(F.leaky_relu(a), dim=0)
+        a = a1 + a2  # shape (B, deg, 1)
+        e = F.softmax(F.leaky_relu(a), dim=1)
        if self.attn_drop != 0.0:
            e = F.dropout(e, self.attn_drop)
-        return {'accum' : torch.sum(e * ft, dim=0)} # shape (D,)
+        return {'accum' : torch.sum(e * ft, dim=1)} # shape (B, D)

 class GATFinalize(nn.Module):
    def __init__(self, headid, indim, hiddendim, activation, residual):
@@ -71,7 +74,7 @@ class GATPrepare(nn.Module):

 class GAT(nn.Module):
    def __init__(self,
-                 nx_graph,
+                 g,
                 num_layers,
                 in_dim,
                 num_hidden,
@@ -82,8 +85,8 @@ class GAT(nn.Module):
                 attn_drop,
                 residual):
        super(GAT, self).__init__()
-        self.g = DGLGraph(nx_graph)
-        self.num_layers = num_layers # one extra output projection
+        self.g = g
+        self.num_layers = num_layers
        self.num_heads = num_heads
        self.prp = nn.ModuleList()
        self.red = nn.ModuleList()
@@ -104,48 +107,39 @@ class GAT(nn.Module):
        # output projection
        self.prp.append(GATPrepare(num_hidden * num_heads, num_classes, in_drop))
        self.red.append(GATReduce(attn_drop))
-        self.fnl.append(GATFinalize(0, num_hidden * num_heads, num_classes, activation, residual))
+        self.fnl.append(GATFinalize(0, num_hidden * num_heads,
+                                    num_classes, activation, residual))
        # sanity check
        assert len(self.prp) == self.num_layers * self.num_heads + 1
        assert len(self.red) == self.num_layers * self.num_heads + 1
        assert len(self.fnl) == self.num_layers * self.num_heads + 1

-    def forward(self, features, train_nodes):
+    def forward(self, features):
        last = features
        for l in range(self.num_layers):
            for hid in range(self.num_heads):
                i = l * self.num_heads + hid
                # prepare
-                for n, h in last.items():
-                    self.g.nodes[n].update(self.prp[i](h))
+                self.g.set_n_repr(self.prp[i](last))
                # message passing
                self.g.update_all(gat_message, self.red[i], self.fnl[i])
            # merge all the heads
-            last = {}
-            for n in self.g.nodes():
-                last[n] = torch.cat(
-                    [self.g.nodes[n]['head%d' % hid] for hid in range(self.num_heads)])
+            last = torch.cat(
+                    [self.g.pop_n_repr('head%d' % hid) for hid in range(self.num_heads)],
+                    dim=1)
        # output projection
-        for n, h in last.items():
-          self.g.nodes[n].update(self.prp[-1](h))
+        self.g.set_n_repr(self.prp[-1](last))
        self.g.update_all(gat_message, self.red[-1], self.fnl[-1])
-        return torch.cat([torch.unsqueeze(self.g.nodes[n]['head0'], 0) for n in train_nodes])
+        return self.g.pop_n_repr('head0')

 def main(args):
    # load and preprocess dataset
    data = load_data(args)

-    # features of each samples
-    features = {}
-    labels = []
-    train_nodes = []
-    for n in data.graph.nodes():
-        features[n] = torch.FloatTensor(data.features[n, :])
-        if data.train_mask[n] == 1:
-            train_nodes.append(n)
-            labels.append(data.labels[n])
-    labels = torch.LongTensor(labels)
-    in_feats = data.features.shape[1]
+    features = torch.FloatTensor(data.features)
+    labels = torch.LongTensor(data.labels)
+    mask = torch.ByteTensor(data.train_mask)
+    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

@@ -154,11 +148,15 @@ def main(args):
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
-        features = {k : v.cuda() for k, v in features.items()}
+        features = features.cuda()
        labels = labels.cuda()
+        mask = mask.cuda()
+
+    # create GCN model
+    g = DGLGraph(data.graph)

    # create model
-    model = GAT(data.graph,
+    model = GAT(g,
                args.num_layers,
                in_feats,
                args.num_hidden,
@@ -181,7 +179,7 @@ def main(args):
        if epoch >= 3:
            t0 = time.time()
        # forward
-        logits = model(features, train_nodes)
+        logits = model(features)
        logp = F.log_softmax(logits, 1)
        loss = F.nll_loss(logp, labels)

@@ -202,7 +200,7 @@ if __name__ == '__main__':
            help="Which GPU to use. Set -1 to use CPU.")
    parser.add_argument("--epochs", type=int, default=20,
            help="number of training epochs")
-    parser.add_argument("--num-heads", type=int, default=8,
+    parser.add_argument("--num-heads", type=int, default=3,
            help="number of attentional heads to use")
    parser.add_argument("--num-layers", type=int, default=1,
            help="number of hidden layers")

--- a/examples/pytorch/gat/gat_batch.py
+++ b/examples/pytorch/gat/gat_batch.py
-"""
-Graph Attention Networks
-Paper: https://arxiv.org/abs/1710.10903
-Code: https://github.com/PetarV-/GAT
-
-GAT with batch processing
-"""
-
-import argparse
-import numpy as np
-import time
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import dgl
-from dgl import DGLGraph
-from dgl.data import register_data_args, load_data
-
-def gat_message(src, edge):
-    return {'ft' : src['ft'], 'a2' : src['a2']}
-
-class GATReduce(nn.Module):
-    def __init__(self, attn_drop):
-        super(GATReduce, self).__init__()
-        self.attn_drop = attn_drop
-
-    def forward(self, node, msgs):
-        a1 = torch.unsqueeze(node['a1'], 1)  # shape (B, 1, 1)
-        a2 = msgs['a2'] # shape (B, deg, 1)
-        ft = msgs['ft'] # shape (B, deg, D)
-        # attention
-        a = a1 + a2  # shape (B, deg, 1)
-        e = F.softmax(F.leaky_relu(a), dim=1)
-        if self.attn_drop != 0.0:
-            e = F.dropout(e, self.attn_drop)
-        return {'accum' : torch.sum(e * ft, dim=1)} # shape (B, D)
-
-class GATFinalize(nn.Module):
-    def __init__(self, headid, indim, hiddendim, activation, residual):
-        super(GATFinalize, self).__init__()
-        self.headid = headid
-        self.activation = activation
-        self.residual = residual
-        self.residual_fc = None
-        if residual:
-            if indim != hiddendim:
-                self.residual_fc = nn.Linear(indim, hiddendim)
-
-    def forward(self, node):
-        ret = node['accum']
-        if self.residual:
-            if self.residual_fc is not None:
-                ret = self.residual_fc(node['h']) + ret
-            else:
-                ret = node['h'] + ret
-        return {'head%d' % self.headid : self.activation(ret)}
-
-class GATPrepare(nn.Module):
-    def __init__(self, indim, hiddendim, drop):
-        super(GATPrepare, self).__init__()
-        self.fc = nn.Linear(indim, hiddendim)
-        self.drop = drop
-        self.attn_l = nn.Linear(hiddendim, 1)
-        self.attn_r = nn.Linear(hiddendim, 1)
-
-    def forward(self, feats):
-        h = feats
-        if self.drop != 0.0:
-            h = F.dropout(h, self.drop)
-        ft = self.fc(h)
-        a1 = self.attn_l(ft)
-        a2 = self.attn_r(ft)
-        return {'h' : h, 'ft' : ft, 'a1' : a1, 'a2' : a2}
-
-class GAT(nn.Module):
-    def __init__(self,
-                 g,
-                 num_layers,
-                 in_dim,
-                 num_hidden,
-                 num_classes,
-                 num_heads,
-                 activation,
-                 in_drop,
-                 attn_drop,
-                 residual):
-        super(GAT, self).__init__()
-        self.g = g
-        self.num_layers = num_layers
-        self.num_heads = num_heads
-        self.prp = nn.ModuleList()
-        self.red = nn.ModuleList()
-        self.fnl = nn.ModuleList()
-        # input projection (no residual)
-        for hid in range(num_heads):
-            self.prp.append(GATPrepare(in_dim, num_hidden, in_drop))
-            self.red.append(GATReduce(attn_drop))
-            self.fnl.append(GATFinalize(hid, in_dim, num_hidden, activation, False))
-        # hidden layers
-        for l in range(num_layers - 1):
-            for hid in range(num_heads):
-                # due to multi-head, the in_dim = num_hidden * num_heads
-                self.prp.append(GATPrepare(num_hidden * num_heads, num_hidden, in_drop))
-                self.red.append(GATReduce(attn_drop))
-                self.fnl.append(GATFinalize(hid, num_hidden * num_heads,
-                                            num_hidden, activation, residual))
-        # output projection
-        self.prp.append(GATPrepare(num_hidden * num_heads, num_classes, in_drop))
-        self.red.append(GATReduce(attn_drop))
-        self.fnl.append(GATFinalize(0, num_hidden * num_heads,
-                                    num_classes, activation, residual))
-        # sanity check
-        assert len(self.prp) == self.num_layers * self.num_heads + 1
-        assert len(self.red) == self.num_layers * self.num_heads + 1
-        assert len(self.fnl) == self.num_layers * self.num_heads + 1
-
-    def forward(self, features):
-        last = features
-        for l in range(self.num_layers):
-            for hid in range(self.num_heads):
-                i = l * self.num_heads + hid
-                # prepare
-                self.g.set_n_repr(self.prp[i](last))
-                # message passing
-                self.g.update_all(gat_message, self.red[i], self.fnl[i], batchable=True)
-            # merge all the heads
-            last = torch.cat(
-                    [self.g.pop_n_repr('head%d' % hid) for hid in range(self.num_heads)],
-                    dim=1)
-        # output projection
-        self.g.set_n_repr(self.prp[-1](last))
-        self.g.update_all(gat_message, self.red[-1], self.fnl[-1], batchable=True)
-        return self.g.pop_n_repr('head0')
-
-def main(args):
-    # load and preprocess dataset
-    data = load_data(args)
-
-    features = torch.FloatTensor(data.features)
-    labels = torch.LongTensor(data.labels)
-    mask = torch.ByteTensor(data.train_mask)
-    in_feats = features.shape[1]
-    n_classes = data.num_labels
-    n_edges = data.graph.number_of_edges()
-
-    if args.gpu < 0:
-        cuda = False
-    else:
-        cuda = True
-        torch.cuda.set_device(args.gpu)
-        features = features.cuda()
-        labels = labels.cuda()
-        mask = mask.cuda()
-
-    # create GCN model
-    g = DGLGraph(data.graph)
-
-    # create model
-    model = GAT(g,
-                args.num_layers,
-                in_feats,
-                args.num_hidden,
-                n_classes,
-                args.num_heads,
-                F.elu,
-                args.in_drop,
-                args.attn_drop,
-                args.residual)
-
-    if cuda:
-        model.cuda()
-
-    # use optimizer
-    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
-
-    # initialize graph
-    dur = []
-    for epoch in range(args.epochs):
-        if epoch >= 3:
-            t0 = time.time()
-        # forward
-        logits = model(features)
-        logp = F.log_softmax(logits, 1)
-        loss = F.nll_loss(logp, labels)
-
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        if epoch >= 3:
-            dur.append(time.time() - t0)
-
-        print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
-            epoch, loss.item(), np.mean(dur), n_edges / np.mean(dur) / 1000))
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='GAT')
-    register_data_args(parser)
-    parser.add_argument("--gpu", type=int, default=-1,
-            help="Which GPU to use. Set -1 to use CPU.")
-    parser.add_argument("--epochs", type=int, default=20,
-            help="number of training epochs")
-    parser.add_argument("--num-heads", type=int, default=3,
-            help="number of attentional heads to use")
-    parser.add_argument("--num-layers", type=int, default=1,
-            help="number of hidden layers")
-    parser.add_argument("--num-hidden", type=int, default=8,
-            help="size of hidden units")
-    parser.add_argument("--residual", action="store_false",
-            help="use residual connection")
-    parser.add_argument("--in-drop", type=float, default=.6,
-            help="input feature dropout")
-    parser.add_argument("--attn-drop", type=float, default=.6,
-            help="attention dropout")
-    parser.add_argument("--lr", type=float, default=0.005,
-            help="learning rate")
-    args = parser.parse_args()
-    print(args)
-
-    main(args)
--- a/examples/pytorch/gcn/README.md
+++ b/examples/pytorch/gcn/README.md
@@ -4,43 +4,9 @@ Graph Convolutional Networks (GCN)
 Paper link: [https://arxiv.org/abs/1609.02907](https://arxiv.org/abs/1609.02907)
 Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn)

-The folder contains three different implementations using DGL.
+The folder contains two different implementations using DGL.

-Naive GCN (gcn.py)
-------
-The model is defined in the finest granularity (aka on *one* edge and *one* node).
-
-* The message function `gcn_msg` computes the message for one edge. It simply returns the `h` representation of the source node.
-  ```python
-  def gcn_msg(src, edge):
-    # src['h'] is a tensor of shape (D,). D is the feature length.
-    return src['h']
-  ```
-* The reduce function `gcn_reduce` accumulates the incoming messages for one node. The `msgs` argument is a list of all the messages. In GCN, the incoming messages are summed up.
-  ```python
-  def gcn_reduce(node, msgs):
-    # msgs is a list of in-coming messages.
-    return sum(msgs)
-  ```
-* The update function `NodeUpdateModule` computes the new new node representation `h` using non-linear transformation on the reduced messages.
-  ```python
-  class NodeUpdateModule(nn.Module):
-    def __init__(self, in_feats, out_feats, activation=None):
-      super(NodeUpdateModule, self).__init__()
-      self.linear = nn.Linear(in_feats, out_feats)
-      self.activation = activation
-
-    def forward(self, node, accum):
-      # accum is a tensor of shape (D,).
-      h = self.linear(accum)
-      if self.activation:
-          h = self.activation(h)
-      return {'h' : h}
-  ```
-
-After defining the functions on each node/edge, the message passing is triggered by calling `update_all` on the DGLGraph object (in GCN module).
-
-Batched GCN (gcn_batch.py)
+Batched GCN (gcn.py)
 -----------
 Defining the model on only one node and edge makes it hard to fully utilize GPUs. As a result, we allow users to define model on a *batch of* nodes and edges.


--- a/examples/pytorch/gcn/gcn.py
+++ b/examples/pytorch/gcn/gcn.py
@@ -2,6 +2,8 @@
 Semi-Supervised Classification with Graph Convolutional Networks
 Paper: https://arxiv.org/abs/1609.02907
 Code: https://github.com/tkipf/gcn
+
+GCN with batch processing
 """
 import argparse
 import numpy as np
@@ -9,14 +11,15 @@ import time
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+import dgl
 from dgl import DGLGraph
 from dgl.data import register_data_args, load_data

 def gcn_msg(src, edge):
-    return src['h']
+    return src

 def gcn_reduce(node, msgs):
-    return {'h' : sum(msgs)}
+    return torch.sum(msgs, 1)

 class NodeApplyModule(nn.Module):
    def __init__(self, in_feats, out_feats, activation=None):
@@ -25,14 +28,14 @@ class NodeApplyModule(nn.Module):
        self.activation = activation

    def forward(self, node):
-        h = self.linear(node['h'])
+        h = self.linear(node)
        if self.activation:
            h = self.activation(h)
-        return {'h' : h}
+        return h

 class GCN(nn.Module):
    def __init__(self,
-                 nx_graph,
+                 g,
                 in_feats,
                 n_hidden,
                 n_classes,
@@ -40,7 +43,7 @@ class GCN(nn.Module):
                 activation,
                 dropout):
        super(GCN, self).__init__()
-        self.g = DGLGraph(nx_graph)
+        self.g = g
        self.dropout = dropout
        # input layer
        self.layers = nn.ModuleList([NodeApplyModule(in_feats, n_hidden, activation)])
@@ -50,31 +53,24 @@ class GCN(nn.Module):
        # output layer
        self.layers.append(NodeApplyModule(n_hidden, n_classes))

-    def forward(self, features, train_nodes):
-        for n, feat in features.items():
-            self.g.nodes[n]['h'] = feat
+    def forward(self, features):
+        self.g.set_n_repr(features)
        for layer in self.layers:
            # apply dropout
            if self.dropout:
-                self.g.nodes[n]['h'] = F.dropout(g.nodes[n]['h'], p=self.dropout)
+                val = F.dropout(self.g.get_n_repr(), p=self.dropout)
+                self.g.set_n_repr(val)
            self.g.update_all(gcn_msg, gcn_reduce, layer)
-        return torch.cat([torch.unsqueeze(self.g.nodes[n]['h'], 0) for n in train_nodes])
+        return self.g.pop_n_repr()

 def main(args):
    # load and preprocess dataset
    data = load_data(args)

-    # features of each samples
-    features = {}
-    labels = []
-    train_nodes = []
-    for n in data.graph.nodes():
-        features[n] = torch.FloatTensor(data.features[n, :])
-        if data.train_mask[n] == 1:
-            train_nodes.append(n)
-            labels.append(data.labels[n])
-    labels = torch.LongTensor(labels)
-    in_feats = data.features.shape[1]
+    features = torch.FloatTensor(data.features)
+    labels = torch.LongTensor(data.labels)
+    mask = torch.ByteTensor(data.train_mask)
+    in_feats = features.shape[1]
    n_classes = data.num_labels
    n_edges = data.graph.number_of_edges()

@@ -83,11 +79,13 @@ def main(args):
    else:
        cuda = True
        torch.cuda.set_device(args.gpu)
-        features = {k : v.cuda() for k, v in features.items()}
+        features = features.cuda()
        labels = labels.cuda()
+        mask = mask.cuda()

    # create GCN model
-    model = GCN(data.graph,
+    g = DGLGraph(data.graph)
+    model = GCN(g,
                in_feats,
                args.n_hidden,
                n_classes,
@@ -107,9 +105,9 @@ def main(args):
        if epoch >= 3:
            t0 = time.time()
        # forward
-        logits = model(features, train_nodes)
+        logits = model(features)
        logp = F.log_softmax(logits, 1)
-        loss = F.nll_loss(logp, labels)
+        loss = F.nll_loss(logp[mask], labels[mask])

        optimizer.zero_grad()
        loss.backward()
@@ -130,7 +128,7 @@ if __name__ == '__main__':
            help="gpu")
    parser.add_argument("--lr", type=float, default=1e-3,
            help="learning rate")
-    parser.add_argument("--n-epochs", type=int, default=10,
+    parser.add_argument("--n-epochs", type=int, default=20,
            help="number of training epochs")
    parser.add_argument("--n-hidden", type=int, default=16,
            help="number of hidden gcn units")

--- a/examples/pytorch/gcn/gcn_batch.py
+++ b/examples/pytorch/gcn/gcn_batch.py
-"""
-Semi-Supervised Classification with Graph Convolutional Networks
-Paper: https://arxiv.org/abs/1609.02907
-Code: https://github.com/tkipf/gcn
-
-GCN with batch processing
-"""
-import argparse
-import numpy as np
-import time
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import dgl
-from dgl import DGLGraph
-from dgl.data import register_data_args, load_data
-
-def gcn_msg(src, edge):
-    return src
-
-def gcn_reduce(node, msgs):
-    return torch.sum(msgs, 1)
-
-class NodeApplyModule(nn.Module):
-    def __init__(self, in_feats, out_feats, activation=None):
-        super(NodeApplyModule, self).__init__()
-        self.linear = nn.Linear(in_feats, out_feats)
-        self.activation = activation
-
-    def forward(self, node):
-        h = self.linear(node)
-        if self.activation:
-            h = self.activation(h)
-        return h
-
-class GCN(nn.Module):
-    def __init__(self,
-                 g,
-                 in_feats,
-                 n_hidden,
-                 n_classes,
-                 n_layers,
-                 activation,
-                 dropout):
-        super(GCN, self).__init__()
-        self.g = g
-        self.dropout = dropout
-        # input layer
-        self.layers = nn.ModuleList([NodeApplyModule(in_feats, n_hidden, activation)])
-        # hidden layers
-        for i in range(n_layers - 1):
-            self.layers.append(NodeApplyModule(n_hidden, n_hidden, activation))
-        # output layer
-        self.layers.append(NodeApplyModule(n_hidden, n_classes))
-
-    def forward(self, features):
-        self.g.set_n_repr(features)
-        for layer in self.layers:
-            # apply dropout
-            if self.dropout:
-                val = F.dropout(self.g.get_n_repr(), p=self.dropout)
-                self.g.set_n_repr(val)
-            self.g.update_all(gcn_msg, gcn_reduce, layer, batchable=True)
-        return self.g.pop_n_repr()
-
-def main(args):
-    # load and preprocess dataset
-    data = load_data(args)
-
-    features = torch.FloatTensor(data.features)
-    labels = torch.LongTensor(data.labels)
-    mask = torch.ByteTensor(data.train_mask)
-    in_feats = features.shape[1]
-    n_classes = data.num_labels
-    n_edges = data.graph.number_of_edges()
-
-    if args.gpu < 0:
-        cuda = False
-    else:
-        cuda = True
-        torch.cuda.set_device(args.gpu)
-        features = features.cuda()
-        labels = labels.cuda()
-        mask = mask.cuda()
-
-    # create GCN model
-    g = DGLGraph(data.graph)
-    model = GCN(g,
-                in_feats,
-                args.n_hidden,
-                n_classes,
-                args.n_layers,
-                F.relu,
-                args.dropout)
-
-    if cuda:
-        model.cuda()
-
-    # use optimizer
-    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)
-
-    # initialize graph
-    dur = []
-    for epoch in range(args.n_epochs):
-        if epoch >= 3:
-            t0 = time.time()
-        # forward
-        logits = model(features)
-        logp = F.log_softmax(logits, 1)
-        loss = F.nll_loss(logp[mask], labels[mask])
-
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-
-        if epoch >= 3:
-            dur.append(time.time() - t0)
-
-        print("Epoch {:05d} | Loss {:.4f} | Time(s) {:.4f} | ETputs(KTEPS) {:.2f}".format(
-            epoch, loss.item(), np.mean(dur), n_edges / np.mean(dur) / 1000))
-
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='GCN')
-    register_data_args(parser)
-    parser.add_argument("--dropout", type=float, default=0,
-            help="dropout probability")
-    parser.add_argument("--gpu", type=int, default=-1,
-            help="gpu")
-    parser.add_argument("--lr", type=float, default=1e-3,
-            help="learning rate")
-    parser.add_argument("--n-epochs", type=int, default=20,
-            help="number of training epochs")
-    parser.add_argument("--n-hidden", type=int, default=16,
-            help="number of hidden gcn units")
-    parser.add_argument("--n-layers", type=int, default=1,
-            help="number of hidden gcn layers")
-    args = parser.parse_args()
-    print(args)
-
-    main(args)
--- a/examples/pytorch/gcn/gcn_spmv.py
+++ b/examples/pytorch/gcn/gcn_spmv.py
@@ -55,7 +55,7 @@ class GCN(nn.Module):
            if self.dropout:
                val = F.dropout(self.g.get_n_repr(), p=self.dropout)
                self.g.set_n_repr(val)
-            self.g.update_all(fn.copy_src(), fn.sum(), layer, batchable=True)
+            self.g.update_all(fn.copy_src(), fn.sum(), layer)
        return self.g.pop_n_repr()

 def main(args):

--- a/include/dgl/graph_op.h
+++ b/include/dgl/graph_op.h
@@ -10,10 +10,16 @@ class GraphOp {
 public:
  /*!
   * \brief Return the line graph.
+   *
+   * If i~j and j~i are two edges in original graph G, then
+   * (i,j)~(j,i) and (j,i)~(i,j) are the "backtracking" edges on
+   * the line graph.
+   *
   * \param graph The input graph.
+   * \param backtracking Whether the backtracking edges are included or not
   * \return the line graph
   */
-  static Graph LineGraph(const Graph* graph);
+  static Graph LineGraph(const Graph* graph, bool backtracking);
  /*!
   * \brief Return a disjoint union of the input graphs.
   *

--- a/python/dgl/__init__.py
+++ b/python/dgl/__init__.py
@@ -9,7 +9,7 @@ from ._ffi.function import register_func, get_global_func, list_global_func_name
 from ._ffi.base import DGLError, __version__

 from .base import ALL
-from .batch import batch, unbatch
+from .batched_graph import *
 from .generator import *
 from .graph import DGLGraph, __MSG__, __REPR__
 from .subgraph import DGLSubGraph
--- a/python/dgl/batch.py
+++ b/python/dgl/batch.py
@@ -10,6 +10,8 @@ from . import graph_index as gi
 from . import backend as F
 from . import utils

+__all__ = ['BatchedDGLGraph', 'batch', 'unbatch', 'split']
+
 class BatchedDGLGraph(DGLGraph):
    """The batched DGL graph.

@@ -74,15 +76,15 @@ class BatchedDGLGraph(DGLGraph):

    # override APIs
    def add_nodes(self, num, reprs=None):
-        """Add nodes."""
+        """Add nodes. Disabled because BatchedDGLGraph is read-only."""
        raise RuntimeError('Readonly graph. Mutation is not allowed.')

    def add_edge(self, u, v, reprs=None):
-        """Add one edge."""
+        """Add one edge. Disabled because BatchedDGLGraph is read-only."""
        raise RuntimeError('Readonly graph. Mutation is not allowed.')

    def add_edges(self, u, v, reprs=None):
-        """Add many edges."""
+        """Add many edges. Disabled because BatchedDGLGraph is read-only."""
        raise RuntimeError('Readonly graph. Mutation is not allowed.')

    # new APIs
@@ -96,6 +98,25 @@ class BatchedDGLGraph(DGLGraph):
        # TODO
        pass

+    def readout(self, reduce_func):
+        """Perform readout for each graph in the batch.
+
+        The readout value is a tensor of shape (B, D1, D2, ...) where B is the
+        batch size.
+
+        Parameters
+        ----------
+        reduce_func : callable
+            The reduce function for readout.
+
+        Returns
+        -------
+        dict of tensors
+            The readout values.
+        """
+        # TODO
+        pass
+
    '''
    def query_new_node(self, g, u):
        idx = self.graph_idx[g]
@@ -127,7 +148,7 @@ def split(graph_batch, num_or_size_splits):
    pass

 def unbatch(graph):
-    """Unbatch the graph and return a list of subgraphs.
+    """Unbatch and return the list of graphs in this batch.

    Parameters
    ----------
@@ -143,12 +164,10 @@ def unbatch(graph):
    node_frames = [FrameRef() for i in range(bsize)]
    edge_frames = [FrameRef() for i in range(bsize)]
    for attr, col in graph._node_frame.items():
-        # TODO: device context
        col_splits = F.unpack(col, bn)
        for i in range(bsize):
            node_frames[i][attr] = col_splits[i]
    for attr, col in graph._edge_frame.items():
-        # TODO: device context
        col_splits = F.unpack(col, be)
        for i in range(bsize):
            edge_frames[i][attr] = col_splits[i]

--- a/python/dgl/data/tree.py
+++ b/python/dgl/data/tree.py
@@ -79,22 +79,3 @@ class SST(object):
    @property 
    def num_vocabs(self):
        return len(self.vocab)
-
-    @staticmethod
-    def batcher(batch):
-        nid_with_word = []
-        wordid = []
-        label = []
-        gnid = 0
-        for tree in batch:
-            for nid in range(tree.number_of_nodes()):
-                if tree.nodes[nid]['x'] != SST.PAD_WORD:
-                    nid_with_word.append(gnid)
-                    wordid.append(tree.nodes[nid]['x'])
-                label.append(tree.nodes[nid]['y'])
-                gnid += 1
-        batch_trees = dgl.batch(batch)
-        return SSTBatch(graph=batch_trees,
-                        nid_with_word=F.tensor(nid_with_word, dtype=F.int64),
-                        wordid=F.tensor(wordid, dtype=F.int64),
-                        label=F.tensor(label, dtype=F.int64))
--- a/python/dgl/frame.py
+++ b/python/dgl/frame.py
@@ -141,7 +141,7 @@ class FrameRef(MutableMapping):
        else:
            self.update_rows(key, val)

-    def add_column(self, name, col):
+    def add_column(self, name, col, inplace=False):
        shp = F.shape(col)
        if self.is_span_whole_column():
            if self.num_columns == 0:
@@ -157,18 +157,25 @@ class FrameRef(MutableMapping):
                fcol = F.zeros((self._frame.num_rows,) + shp[1:])
                fcol = F.to_context(fcol, colctx)
            idx = self.index().tousertensor(colctx)
-            newfcol = F.scatter_row(fcol, idx, col)
-            self._frame[name] = newfcol
+            if inplace:
+                self._frame[name] = fcol
+                self._frame[name][idx] = col
+            else:
+                newfcol = F.scatter_row(fcol, idx, col)
+                self._frame[name] = newfcol

-    def update_rows(self, query, other):
+    def update_rows(self, query, other, inplace=False):
        rowids = self._getrowid(query)
        for key, col in other.items():
            if key not in self:
                # add new column
                tmpref = FrameRef(self._frame, rowids)
-                tmpref.add_column(key, col)
+                tmpref.add_column(key, col, inplace)
            idx = rowids.tousertensor(F.get_context(self._frame[key]))
-            self._frame[key] = F.scatter_row(self._frame[key], idx, col)
+            if inplace:
+                self._frame[key][idx] = col
+            else:
+                self._frame[key] = F.scatter_row(self._frame[key], idx, col)

    def __delitem__(self, key):
        if isinstance(key, str):

--- a/python/dgl/graph.py
+++ b/python/dgl/graph.py
@@ -32,15 +32,11 @@ class DGLGraph(object):
        Node feature storage.
    edge_frame : FrameRef
        Edge feature storage.
-    attr : keyword arguments, optional
-        Attributes to add to graph as key=value pairs.
    """
    def __init__(self,
                 graph_data=None,
                 node_frame=None,
-                 edge_frame=None,
-                 **attr):
-        # TODO: keyword attr
+                 edge_frame=None):
        # graph
        self._graph = create_graph_index(graph_data)
        # frame
@@ -502,7 +498,7 @@ class DGLGraph(object):
        """
        return self._edge_frame.schemes

-    def set_n_repr(self, hu, u=ALL):
+    def set_n_repr(self, hu, u=ALL, inplace=False):
        """Set node(s) representation.

        To set multiple node representations at once, pass `u` with a tensor or
@@ -540,9 +536,9 @@ class DGLGraph(object):
                self._node_frame[__REPR__] = hu
        else:
            if utils.is_dict_like(hu):
-                self._node_frame[u] = hu
+                self._node_frame.update_rows(u, hu, inplace=inplace)
            else:
-                self._node_frame[u] = {__REPR__ : hu}
+                self._node_frame.update_rows(u, {__REPR__ : hu}, inplace=inplace)

    def get_n_repr(self, u=ALL):
        """Get node(s) representation.
@@ -842,7 +838,7 @@ class DGLGraph(object):
    def _batch_send(self, u, v, message_func):
        if is_all(u) and is_all(v):
            u, v, _ = self._graph.edges()
-            self._msg_graph.add_edges(u, v)
+            self._msg_graph.add_edges(u, v) # TODO(minjie): can be optimized
            # call UDF
            src_reprs = self.get_n_repr(u)
            edge_reprs = self.get_e_repr()
@@ -1144,30 +1140,28 @@ class DGLGraph(object):
        self.apply_nodes(ALL, apply_node_func)

    def propagate(self,
-                  iterator='bfs',
+                  traverser='topo',
                  message_func="default",
                  reduce_func="default",
                  apply_node_func="default",
                  **kwargs):
-        """Propagate messages and update nodes using iterator.
+        """Propagate messages and update nodes using graph traversal.

        A convenient function for passing messages and updating
-        nodes according to the iterator. The iterator can be
-        any of the pre-defined iterators ('bfs', 'dfs', 'pre-order',
-        'mid-order', 'post-order'). The computation will be unrolled
-        in the backend efficiently. User can also provide custom
-        iterator that generates the edges and nodes.
+        nodes according to the traverser. The traverser can be
+        any of the pre-defined traverser (e.g. 'topo'). User can also provide custom
+        traverser that generates the edges and nodes.

        Parameters
        ----------
+        traverser : str or generator of edges.
+          The traverser of the graph.
        message_func : str or callable
          The message function.
        reduce_func : str or callable
          The reduce function.
        apply_node_func : str or callable
          The update function.
-        iterator : str or generator of steps.
-          The iterator of the graph.
        kwargs : keyword arguments, optional
            Arguments for pre-defined iterators.
        """
@@ -1197,7 +1191,9 @@ class DGLGraph(object):
        G : DGLSubGraph
            The subgraph.
        """
-        return dgl.DGLSubGraph(self, nodes)
+        induced_nodes = utils.toindex(nodes)
+        gi, induced_edges = self._graph.node_subgraph(induced_nodes)
+        return dgl.DGLSubGraph(self, induced_nodes, induced_edges, gi)

    def merge(self, subgraphs, reduce_func='sum'):
        """Merge subgraph features back to this parent graph.

--- a/python/dgl/nx_adapt.py
+++ b/python/dgl/nx_adapt.py
-"""Utility functions for networkx adapter."""
-from __future__ import absolute_import
-
-from collections import MutableMapping
-
-import networkx as nx
-import networkx.convert as convert
-
-class NodeDict(MutableMapping):
-    def __init__(self, add_cb, del_cb):
-        self._dict = {}
-        self._add_cb = add_cb
-        self._del_cb = del_cb
-    def __setitem__(self, key, val):
-        self._add_cb(key)
-        self._dict[key] = val
-    def __getitem__(self, key):
-        return self._dict[key]
-    def __delitem__(self, key):
-        self._del_cb(key)
-        del self._dict[key]
-    def __len__(self):
-        return len(self._dict)
-    def __iter__(self):
-        return iter(self._dict)
-
-class AdjOuterDict(MutableMapping):
-    def __init__(self, add_cb, del_cb):
-        self._dict = {}
-        self._add_cb = add_cb
-        self._del_cb = del_cb
-    def __setitem__(self, key, val):
-        val.src = key
-        self._dict[key] = val
-    def __getitem__(self, key):
-        return self._dict[key]
-    def __delitem__(self, key):
-        for val in self._dict[key]:
-            self._del_cb(key, val)
-        del self._dict[key]
-    def __len__(self):
-        return len(self._dict)
-    def __iter__(self):
-        return iter(self._dict)
-
-class AdjInnerDict(MutableMapping):
-    def __init__(self, add_cb, del_cb):
-        self._dict = {}
-        self.src = None
-        self._add_cb = add_cb
-        self._del_cb = del_cb
-    def __setitem__(self, key, val):
-        if self.src is not None and key not in self._dict:
-            self._add_cb(self.src, key)
-        self._dict[key] = val
-    def __getitem__(self, key):
-        return self._dict[key]
-    def __delitem__(self, key):
-        if self.src is not None:
-            self._del_cb(self.src, key)
-        del self._dict[key]
-    def __len__(self):
-        return len(self._dict)
-    def __iter__(self):
-        return iter(self._dict)
-
-class AdjInnerDictFactory(object):
-    def __init__(self, cb1, cb2):
-        self._cb1 = cb1
-        self._cb2 = cb2
-    def __call__(self):
-        return AdjInnerDict(self._cb1, self._cb2)
-
-def nx_init(obj,
-            add_node_cb,
-            add_edge_cb,
-            del_node_cb,
-            del_edge_cb,
-            graph_data,
-            **attr):
-    """Init the object to be compatible with networkx's DiGraph.
-
-    Parameters
-    ----------
-    obj : any
-        The object to be init.
-    add_node_cb : callable
-        The callback function when node is added.
-    add_edge_cb : callable
-        The callback function when edge is added.
-    graph_data : graph data
-        Data to initialize graph. Same as networkx's semantics.
-    attr : keyword arguments, optional
-        Attributes to add to graph as key=value pairs.
-    """
-    # The following codes work for networkx 2.1.
-    obj.adjlist_outer_dict_factory = None
-    obj.adjlist_inner_dict_factory = AdjInnerDictFactory(add_edge_cb, del_edge_cb)
-    obj.edge_attr_dict_factory = dict
-
-    obj.root_graph = obj
-    obj.graph = {}
-    obj._node = NodeDict(add_node_cb, del_node_cb)
-
-    obj._adj = AdjOuterDict(add_edge_cb, del_edge_cb)
-    obj._pred = dict()
-    obj._succ = obj._adj
-
-    if graph_data is not None:
-        convert.to_networkx_graph(graph_data, create_using=obj)
-    obj.graph.update(attr)
--- a/python/dgl/subgraph.py
+++ b/python/dgl/subgraph.py
--- a/python/setup.py
+++ b/python/setup.py
@@ -17,7 +17,6 @@ setuptools.setup(
        'numpy>=1.14.0',
        'scipy>=1.1.0',
        'networkx>=2.1',
-        'python-igraph>=0.7.0',
    ],
    data_files=[('', ['VERSION'])],
-    url='https://github.com/jermainewang/dgl-1')
+    url='https://github.com/jermainewang/dgl')
--- a/tests/pytorch/test_batching.py
+++ b/tests/pytorch/test_batching.py