[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)

* Update graph * Fix for dgl.graph * from_scipy * Replace canonical_etypes with relations * from_networkx * Update for hetero_from_relations * Roll back the change of canonical_etypes to relations * heterograph * bipartite * Update doc * Fix lint * Fix lint * Fix test cases * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix test * Fix * Update * Use DGLError * Update * Update * Update * Update * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Update * Update * Update * Update * Update * Update * Update * Fix * Fix * Update * Update * Update * Update * Update * Update * rewrite sanity checks * delete unnecessary checks * Update * Update * Update * Update * Update * Update * Update * Update * Fix * Update * Update * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Update * Fix * Update * Fix Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com> Co-authored-by: Quan Gan <coin2028@hotmail.com>

[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)
* Update graph * Fix for dgl.graph * from_scipy * Replace canonical_etypes with relations * from_networkx * Update for hetero_from_relations * Roll back the change of canonical_etypes to relations * heterograph * bipartite * Update doc * Fix lint * Fix lint * Fix test cases * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix test * Fix * Update * Use DGLError * Update * Update * Update * Update * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Update * Update * Update * Update * Update * Update * Update * Fix * Fix * Update * Update * Update * Update * Update * Update * rewrite sanity checks * delete unnecessary checks * Update * Update * Update * Update * Update * Update * Update * Update * Fix * Update * Update * Update * Fix * Fix * Fix * Update * Fix * Update * Fix * Fix * Update * Fix * Update * Fix Co-authored-by: xiang song(charlie.song) <classicxsong@gmail.com> Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com> Co-authored-by: Quan Gan <coin2028@hotmail.com>
be444e52 · Mufei Li · GitHub · 0afc3cf8 · be444e52 · be444e52
Unverified Commit be444e52 authored Aug 18, 2020 by Mufei Li Committed by GitHub Aug 18, 2020
20 changed files
--- a/examples/tensorflow/rgcn/entity_classify.py
+++ b/examples/tensorflow/rgcn/entity_classify.py
@@ -66,7 +66,6 @@ def main(args):
        hg = dataset[0]

        num_rels = len(hg.canonical_etypes)
-        num_of_ntype = len(hg.ntypes)
        category = dataset.predict_category
        num_classes = dataset.num_classes
        train_mask = hg.nodes[category].data.pop('train_mask')
@@ -98,7 +97,7 @@ def main(args):
                category_id = i

        # edge type and normalization factor
-        g = dgl.to_homo(hg)
+        g = dgl.to_homogeneous(hg, edata=['norm'])

    # check cuda
    if args.gpu < 0:

--- a/include/dgl/transform.h
+++ b/include/dgl/transform.h
@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
 * * \c count : The array of edge occurrences per edge type.
 * * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
 *
- * \note Example: consider the following graph:
+ * \note Example: consider a graph with the following edges
 *
- *     g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)])
+ *     [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]
 *
 * Then ToSimpleGraph(g) would yield the following elements:
 *
- * * The first element would be the simple graph itself:
+ * * The first element would be the simple graph itself with the following edges
 *
- *       simple_g = dgl.graph([(0, 1), (1, 3), (1, 4), (2, 2)])
+ *       [(0, 1), (1, 3), (1, 4), (2, 2)]
 *
 * * The second element is an array \c count.  \c count[i] stands for the number of edges
 *   connecting simple_g.src[i] and simple_g.dst[i] in the original graph.

--- a/python/dgl/_deprecate/graph.py
+++ b/python/dgl/_deprecate/graph.py
@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph):
        self._node_frame = old_nframe
        self._edge_frame = old_eframe

+    @property
    def is_homogeneous(self):
        """Return if the graph is homogeneous."""
        return True

--- a/python/dgl/batch.py
+++ b/python/dgl/batch.py
@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None):
                          for i in range(num_split)]

    # Create graphs
-    gs = [convert.heterograph(edge_dict, num_nodes_dict, validate=True, idtype=g.idtype)
+    gs = [convert.heterograph(edge_dict, num_nodes_dict, idtype=g.idtype)
          for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)]

    # Unbatch node features

--- a/python/dgl/convert.py
+++ b/python/dgl/convert.py
--- a/python/dgl/data/citation_graph.py
+++ b/python/dgl/data/citation_graph.py
@@ -19,7 +19,7 @@ from .. import convert
 from .. import batch
 from .. import backend as F
 from ..convert import graph as dgl_graph
-from ..convert import to_networkx
+from ..convert import from_networkx, to_networkx

 backend = os.environ.get('DGLBACKEND', 'pytorch')

@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset):
        test_mask = _sample_mask(idx_test, labels.shape[0])

        self._graph = graph
-        g = dgl_graph(graph)
+        g = from_networkx(graph)

        g.ndata['train_mask'] = generate_mask_tensor(train_mask)
        g.ndata['val_mask'] = generate_mask_tensor(val_mask)
@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset):
            for line in f.readlines():
                if line.startswith('graph'):
                    if len(elist) != 0:
-                        self.graphs.append(dgl_graph(elist))
+                        self.graphs.append(dgl_graph(tuple(zip(*elist))))
                    elist = []
                else:
                    u, v = line.strip().split(' ')
                    elist.append((int(u), int(v)))
            if len(elist) != 0:
-                self.graphs.append(dgl_graph(elist))
+                self.graphs.append(dgl_graph(tuple(zip(*elist))))
        with open("{}/pmpds.pkl".format(root), 'rb') as f:
            self.pmpds = _pickle_load(f)
        self.labels = []

--- a/python/dgl/data/gindt.py
+++ b/python/dgl/data/gindt.py
@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset):

                self.labels.append(self.glabel_dict[glabel])

-                g = dgl_graph([])
+                g = dgl_graph(([], []))
                g.add_nodes(n_nodes)

                nlabels = []  # node labels

--- a/python/dgl/data/graph_serialize.py
+++ b/python/dgl/data/graph_serialize.py
@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None):
    Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
    and edge features.

-    >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3])
-    >>> g2 = dgl.graph(([0, 2], [2, 3])
+    >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
+    >>> g2 = dgl.graph(([0, 2], [2, 3]))
    >>> g2.edata["e"] = th.ones(2, 4)

    Save Graphs into file

--- a/python/dgl/data/karate.py
+++ b/python/dgl/data/karate.py
@@ -6,7 +6,7 @@ import networkx as nx
 from .. import backend as F
 from .dgl_dataset import DGLDataset
 from .utils import deprecate_property
-from ..convert import graph as dgl_graph
+from ..convert import from_networkx

 __all__ = ['KarateClubDataset', 'KarateClub']

@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset):
        label = np.asarray(
            [kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64)
        label = F.tensor(label)
-        g = dgl_graph(kc_graph)
+        g = from_networkx(kc_graph)
        g.ndata['label'] = label
        self._graph = g
        self._data = [g]

--- a/python/dgl/data/minigc.py
+++ b/python/dgl/data/minigc.py
@@ -6,7 +6,7 @@ import numpy as np
 from .dgl_dataset import DGLDataset
 from .utils import save_graphs, load_graphs, makedirs
 from .. import backend as F
-from ..convert import graph as dgl_graph
+from ..convert import from_networkx
 from ..transform import add_self_loop

 __all__ = ['MiniGCDataset']
@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset):
        # preprocess
        for i in range(self.num_graphs):
            # convert to DGLGraph, and add self loops
-            self.graphs[i] = add_self_loop(dgl_graph(self.graphs[i]))
+            self.graphs[i] = add_self_loop(from_networkx(self.graphs[i]))
        self.labels = F.tensor(np.array(self.labels).astype(np.int))

    def _gen_cycle(self, n):

--- a/python/dgl/data/rdf.py
+++ b/python/dgl/data/rdf.py
@@ -300,10 +300,10 @@ class RDFGraphDataset(DGLBuiltinDataset):
        # convert to heterograph
        if self.verbose:
            print('Convert to heterograph ...')
-        hg = dgl.to_hetero(g,
-                           ntypes,
-                           etypes,
-                           metagraph=mg)
+        hg = dgl.to_heterogeneous(g,
+                                  ntypes,
+                                  etypes,
+                                  metagraph=mg)
        if self.verbose:
            print('#Node types:', len(hg.ntypes))
            print('#Canonical edge types:', len(hg.etypes))

--- a/python/dgl/data/reddit.py
+++ b/python/dgl/data/reddit.py
@@ -8,7 +8,7 @@ import os
 from .dgl_dataset import DGLBuiltinDataset
 from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property
 from .. import backend as F
-from ..convert import graph as dgl_graph
+from ..convert import from_scipy


 class RedditDataset(DGLBuiltinDataset):
@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset):
        # graph
        coo_adj = sp.load_npz(os.path.join(
            self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str)))
-        self._graph = dgl_graph(coo_adj)
+        self._graph = from_scipy(coo_adj)
        # features and labels
        reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
        features = reddit_data["feature"]

--- a/python/dgl/data/sbm.py
+++ b/python/dgl/data/sbm.py
@@ -8,7 +8,7 @@ import numpy.random as npr
 import scipy as sp

 from .dgl_dataset import DGLDataset
-from ..convert import graph as dgl_graph
+from ..convert import from_scipy
 from .. import batch
 from .utils import save_info, save_graphs, load_info, load_graphs

@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset):
            pq = [generator() for _ in range(self._n_graphs)]
        else:
            raise RuntimeError()
-        self._graphs = [dgl_graph(sbm(self._n_communities, self._block_size, *x)) for x in pq]
+        self._graphs = [from_scipy(sbm(self._n_communities, self._block_size, *x)) for x in pq]
        self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs]
        in_degrees = lambda g: g.in_degrees().float()
        self._graph_degrees = [in_degrees(g) for g in self._graphs]

--- a/python/dgl/data/tu.py
+++ b/python/dgl/data/tu.py
@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
        DS_graph_labels = self._idx_from_zero(
            np.genfromtxt(self._file_path("graph_labels"), dtype=int))

-        g = dgl_graph([])
+        g = dgl_graph(([], []))
        g.add_nodes(int(DS_edge_list.max()) + 1)
        g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])

@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset):
        DS_graph_labels = self._idx_from_zero(
            loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int))

-        g = dgl_graph([])
+        g = dgl_graph(([], []))
        g.add_nodes(int(DS_edge_list.max()) + 1)
        g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])


--- a/python/dgl/dataloading/dataloader.py
+++ b/python/dgl/dataloading/dataloader.py
@@ -596,8 +596,11 @@ class EdgeCollator(Collator):
                'graph has multiple or no edge types; '\
                'please return a dict in negative sampler.'
            neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
+        # Get dtype from a tuple of tensors
+        dtype = F.dtype(list(neg_srcdst.values())[0][0])
        neg_edges = {
-            etype: neg_srcdst.get(etype, []) for etype in self.g.canonical_etypes}
+            etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
+            for etype in self.g.canonical_etypes}
        neg_pair_graph = heterograph(
            neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})


--- a/python/dgl/generators.py
+++ b/python/dgl/generators.py
@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
    rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
    cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
    g = convert.graph((rows, cols),
-                      num_nodes=num_nodes, validate=False,
-                      formats=formats,
+                      num_nodes=num_nodes,
                      idtype=idtype, device=device)
-    return g
+    return g.formats(formats)

-def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
+def rand_bipartite(utype, etype, vtype,
+                   num_src_nodes, num_dst_nodes, num_edges,
                   idtype=F.int64, device=F.cpu(),
                   formats=['csr', 'coo', 'csc']):
    """Generate a random bipartite graph of the given number of src/dst nodes and
@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,

    Parameters
    ----------
+    utype : str, optional
+        The name of the source node type.
+    etype : str, optional
+        The name of the edge type.
+    vtype : str, optional
+        The name of the destination node type.
    num_src_nodes : int
        The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
    num_dst_nodes : int
@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
    eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
    rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
    cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
-    g = convert.bipartite((rows, cols),
-                          num_nodes=(num_src_nodes, num_dst_nodes), validate=False,
-                          idtype=idtype, device=device,
-                          formats=formats)
-    return g
+    g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
+                            {utype: num_src_nodes, vtype: num_dst_nodes},
+                            idtype=idtype, device=device)
+    return g.formats(formats)
--- a/python/dgl/heterograph.py
+++ b/python/dgl/heterograph.py
--- a/python/dgl/heterograph_index.py
+++ b/python/dgl/heterograph_index.py
@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase):
        """
        if order is None:
            order = ""
+        elif order not in ['srcdst', 'eid']:
+            raise DGLError("Expect order to be one of None, 'srcdst', 'eid', "
+                           "got {}".format(order))
        edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order)
        src = F.from_dgl_nd(edge_array(0))
        dst = F.from_dgl_nd(edge_array(1))

--- a/python/dgl/nn/mxnet/conv/gatedgraphconv.py
+++ b/python/dgl/nn/mxnet/conv/gatedgraphconv.py
@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block):
            is the output feature size.
        """
        with graph.local_scope():
-            assert graph.is_homogeneous(), \
-                "not a homograph; convert it with to_homo and pass in the edge type as argument"
+            assert graph.is_homogeneous, \
+                "not a homogeneous graph; convert it with to_homogeneous " \
+                "and pass in the edge type as argument"
            zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]),
                                ctx=feat.context)
            feat = nd.concat(feat, zero_pad, dim=-1)

--- a/python/dgl/nn/mxnet/conv/relgraphconv.py
+++ b/python/dgl/nn/mxnet/conv/relgraphconv.py
@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block):
        mx.ndarray.NDArray
            New node features.
        """
-        assert g.is_homogeneous(), \
-            "not a homograph; convert it with to_homo and pass in the edge type as argument"
+        assert g.is_homogeneous, \
+            "not a homogeneous graph; convert it with to_homogeneous " \
+            "and pass in the edge type as argument"
        with g.local_scope():
            g.ndata['h'] = x
            g.edata['type'] = etypes