[Feature] Support direct creation from CSR and CSC (#3045)

* csr and csc creation * fix * fix * fixes to adj transpose * fine * raise error if indptr did not match number of nodes * fix * huh? * oh Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>

[Feature] Support direct creation from CSR and CSC (#3045)
* csr and csc creation * fix * fix * fixes to adj transpose * fine * raise error if indptr did not match number of nodes * fix * huh? * oh Co-authored-by: Minjie Wang <wmjlyjemaine@gmail.com>
acd21a6d · Quan (Andy) Gan · GitHub · 2f7ca414 · acd21a6d · acd21a6d
Unverified Commit acd21a6d authored Jun 25, 2021 by Quan (Andy) Gan Committed by GitHub Jun 25, 2021
20 changed files
--- a/examples/pytorch/diffpool/model/dgl_layers/gnn.py
+++ b/examples/pytorch/diffpool/model/dgl_layers/gnn.py
@@ -110,7 +110,7 @@ class DiffPoolBatchedGraphLayer(nn.Module):
        assign_tensor = torch.block_diag(*assign_tensor)  # size = (sum_N, batch_size * N_a)

        h = torch.matmul(torch.t(assign_tensor), feat)
-        adj = g.adjacency_matrix(transpose=False, ctx=device)
+        adj = g.adjacency_matrix(transpose=True, ctx=device)
        adj_new = torch.sparse.mm(adj, assign_tensor)
        adj_new = torch.mm(torch.t(assign_tensor), adj_new)


--- a/python/dgl/backend/mxnet/sparse.py
+++ b/python/dgl/backend/mxnet/sparse.py
@@ -393,7 +393,7 @@ class CSRMM(mx.autograd.Function):

    def forward(self, A_weights, B_weights):
        gidxC, C_weights = _csrmm(self.gidxA, A_weights, self.gidxB, B_weights, self.num_vtypes)
-        nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
+        nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
        # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
        # as the underlying tensors of the created graph gidxC.
        self.backward_cache = gidxC
@@ -430,7 +430,7 @@ class CSRSum(mx.autograd.Function):
    def forward(self, *weights):
        gidxC, C_weights = _csrsum(self.gidxs, weights)
        nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
-            0, True, 'csr')
+            0, False, 'csr')
        # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
        # as the underlying tensors of the created graph gidxC.
        self.backward_cache = gidxC

--- a/python/dgl/backend/pytorch/sparse.py
+++ b/python/dgl/backend/pytorch/sparse.py
@@ -310,7 +310,7 @@ class CSRMM(th.autograd.Function):
    @staticmethod
    def forward(ctx, gidxA, A_weights, gidxB, B_weights, num_vtypes):
        gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes)
-        nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
+        nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
        # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
        # as the underlying tensors of the created graph gidxC.
        ctx.backward_cache = gidxA, gidxB, gidxC
@@ -337,7 +337,7 @@ class CSRSum(th.autograd.Function):
        # PyTorch tensors must be explicit arguments of the forward function
        gidxC, C_weights = _csrsum(gidxs, weights)
        nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
-            0, True, 'csr')
+            0, False, 'csr')
        # Note: the returned C_indptr, C_indices and C_eids tensors MUST be the same
        # as the underlying tensors of the created graph gidxC.
        ctx.backward_cache = gidxs, gidxC

--- a/python/dgl/backend/tensorflow/sparse.py
+++ b/python/dgl/backend/tensorflow/sparse.py
@@ -302,7 +302,7 @@ def scatter_add(x, idx, m):

 def csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes):
    gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes)
-    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
+    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')

    def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
        # Only the last argument is meaningful.
@@ -328,7 +328,7 @@ def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes):

 def csrsum_real(gidxs, weights):
    gidxC, C_weights = _csrsum(gidxs, weights)
-    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, True, 'csr')
+    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')

    def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
        # Only the last argument is meaningful.

--- a/python/dgl/convert.py
+++ b/python/dgl/convert.py
@@ -53,8 +53,18 @@ def graph(data,
          DGL calls this format "tuple of node-tensors". The tensors should have the same
          data type of int32/int64 and device context (see below the descriptions of
          :attr:`idtype` and :attr:`device`).
-        - ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
-          format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
+        - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
+        - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
+          of the graph's adjacency matrix.  The first one is the row index pointer.  The
+          second one is the column indices.  The third one is the edge IDs, which can be empty
+          to represent consecutive integer IDs starting from 0.
+        - ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
+          of the graph's adjacency matrix.  The first one is the column index pointer.  The
+          second one is the row indices.  The third one is the edge IDs, which can be empty
+          to represent consecutive integer IDs starting from 0.
+
+        The tensors can be replaced with any iterable of integers (e.g. list, tuple,
+        numpy.ndarray).
    ntype : str, optional
        Deprecated. To construct a graph with named node types, use :func:`dgl.heterograph`.
    etype : str, optional
@@ -131,6 +141,14 @@ def graph(data,

    >>> g = dgl.graph((src_ids, dst_ids), idtype=torch.int32, device='cuda:0')

+    Creating a graph with CSR representation:
+
+    >>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], [])))
+
+    Create the same graph with CSR representation and edge IDs.
+
+    >>> g = dgl.graph(('csr', ([0, 0, 0, 1, 2, 3], [1, 2, 3], [0, 1, 2])))
+
    See Also
    --------
    from_scipy
@@ -158,16 +176,15 @@ def graph(data,
                       " Please refer to their API documents for more details.".format(
                           deprecated_kwargs.keys()))

-    u, v, urange, vrange = utils.graphdata2tensors(data, idtype)
+    (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(data, idtype)
    if num_nodes is not None:  # override the number of nodes
        if num_nodes < max(urange, vrange):
            raise DGLError('The num_nodes argument must be larger than the max ID in the data,'
                           ' but got {} and {}.'.format(num_nodes, max(urange, vrange) - 1))
        urange, vrange = num_nodes, num_nodes

-    g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange,
-                          row_sorted=row_sorted, col_sorted=col_sorted,
-                          validate=False)
+    g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange,
+                          row_sorted=row_sorted, col_sorted=col_sorted)

    return g.to(device)

@@ -226,8 +243,18 @@ def heterograph(data_dict,
          this format "tuple of node-tensors". The tensors should have the same data type,
          which must be either int32 or int64. They should also have the same device context
          (see below the descriptions of :attr:`idtype` and :attr:`device`).
-        - ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
-          format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
+        - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
+        - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
+          of the graph's adjacency matrix.  The first one is the row index pointer.  The
+          second one is the column indices.  The third one is the edge IDs, which can be empty
+          (i.e. with 0 elements) to represent consecutive integer IDs starting from 0.
+        - ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
+          of the graph's adjacency matrix.  The first one is the column index pointer.  The
+          second one is the row indices.  The third one is the edge IDs, which can be empty
+          to represent consecutive integer IDs starting from 0.
+
+        The tensors can be replaced with any iterable of integers (e.g. list, tuple,
+        numpy.ndarray).
    num_nodes_dict : dict[str, int], optional
        The number of nodes for some node types, which is a dictionary mapping a node type
        :math:`T` to the number of :math:`T`-typed nodes. If not given for a node type
@@ -320,8 +347,9 @@ def heterograph(data_dict,
            raise DGLError("dgl.heterograph no longer supports graph construction from a NetworkX "
                           "graph, use dgl.from_networkx instead.")
        is_bipartite = (sty != dty)
-        u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=is_bipartite)
-        node_tensor_dict[(sty, ety, dty)] = (u, v)
+        (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
+            data, idtype, bipartite=is_bipartite)
+        node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays)
        if need_infer:
            num_nodes_dict[sty] = max(num_nodes_dict[sty], urange)
            num_nodes_dict[dty] = max(num_nodes_dict[dty], vrange)
@@ -340,8 +368,8 @@ def heterograph(data_dict,
    num_nodes_per_type = utils.toindex([num_nodes_dict[ntype] for ntype in ntypes], "int64")
    rel_graphs = []
    for srctype, etype, dsttype in relations:
-        src, dst = node_tensor_dict[(srctype, etype, dsttype)]
-        g = create_from_edges(src, dst, srctype, etype, dsttype,
+        sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)]
+        g = create_from_edges(sparse_fmt, arrays, srctype, etype, dsttype,
                              num_nodes_dict[srctype], num_nodes_dict[dsttype])
        rel_graphs.append(g)

@@ -368,8 +396,18 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
          this format "tuple of node-tensors". The tensors should have the same data type,
          which must be either int32 or int64. They should also have the same device context
          (see below the descriptions of :attr:`idtype` and :attr:`device`).
-        - ``(iterable[int], iterable[int])``: Similar to the tuple of node-tensors
-          format, but stores node IDs in two sequences (e.g. list, tuple, numpy.ndarray).
+        - ``('coo', (Tensor, Tensor))``: Same as ``(Tensor, Tensor)``.
+        - ``('csr', (Tensor, Tensor, Tensor))``: The three tensors form the CSR representation
+          of the graph's adjacency matrix.  The first one is the row index pointer.  The
+          second one is the column indices.  The third one is the edge IDs, which can be empty
+          to represent consecutive integer IDs starting from 0.
+        - ``('csc', (Tensor, Tensor, Tensor))``: The three tensors form the CSC representation
+          of the graph's adjacency matrix.  The first one is the column index pointer.  The
+          second one is the row indices.  The third one is the edge IDs, which can be empty
+          to represent consecutive integer IDs starting from 0.
+
+        The tensors can be replaced with any iterable of integers (e.g. list, tuple,
+        numpy.ndarray).

        If you would like to create a MFG with a single source node type, a single destination
        node type, and a single edge type, then you can pass in the graph data directly
@@ -489,8 +527,9 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
    # Convert all data to node tensors first
    node_tensor_dict = {}
    for (sty, ety, dty), data in data_dict.items():
-        u, v, urange, vrange = utils.graphdata2tensors(data, idtype, bipartite=True)
-        node_tensor_dict[(sty, ety, dty)] = (u, v)
+        (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
+            data, idtype, bipartite=True)
+        node_tensor_dict[(sty, ety, dty)] = (sparse_fmt, arrays)
        if need_infer:
            num_src_nodes[sty] = max(num_src_nodes[sty], urange)
            num_dst_nodes[dty] = max(num_dst_nodes[dty], vrange)
@@ -525,8 +564,8 @@ def create_block(data_dict, num_src_nodes=None, num_dst_nodes=None, idtype=None,
        meta_edges_src.append(srctype_dict[srctype])
        meta_edges_dst.append(dsttype_dict[dsttype])
        etypes.append(etype)
-        src, dst = node_tensor_dict[(srctype, etype, dsttype)]
-        g = create_from_edges(src, dst, 'SRC/' + srctype, etype, 'DST/' + dsttype,
+        sparse_fmt, arrays = node_tensor_dict[(srctype, etype, dsttype)]
+        g = create_from_edges(sparse_fmt, arrays, 'SRC/' + srctype, etype, 'DST/' + dsttype,
                              num_src_nodes[srctype], num_dst_nodes[dsttype])
        rel_graphs.append(g)

@@ -1041,8 +1080,8 @@ def from_scipy(sp_mat,
        raise DGLError('Expect the number of rows to be the same as the number of columns for '
                       'sp_mat, got {:d} and {:d}.'.format(num_rows, num_cols))

-    u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
-    g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
+    (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(sp_mat, idtype)
+    g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange)
    if eweight_name is not None:
        g.edata[eweight_name] = F.tensor(sp_mat.data)
    return g.to(device)
@@ -1135,9 +1174,8 @@ def bipartite_from_scipy(sp_mat,
    heterograph
    bipartite_from_networkx
    """
-    # Sanity check
-    u, v, urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
-    g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
+    (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(sp_mat, idtype, bipartite=True)
+    g = create_from_edges(sparse_fmt, arrays, utype, etype, vtype, urange, vrange)
    if eweight_name is not None:
        g.edata[eweight_name] = F.tensor(sp_mat.data)
    return g.to(device)
@@ -1255,10 +1293,10 @@ def from_networkx(nx_graph,
    if not nx_graph.is_directed():
        nx_graph = nx_graph.to_directed()

-    u, v, urange, vrange = utils.graphdata2tensors(
+    (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
        nx_graph, idtype, edge_id_attr_name=edge_id_attr_name)

-    g = create_from_edges(u, v, '_N', '_E', '_N', urange, vrange)
+    g = create_from_edges(sparse_fmt, arrays, '_N', '_E', '_N', urange, vrange)

    # nx_graph.edges(data=True) returns src, dst, attr_dict
    has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
@@ -1450,12 +1488,12 @@ def bipartite_from_networkx(nx_graph,
    bottom_map = {n : i for i, n in enumerate(bottom_nodes)}

    # Get the node tensors and the number of nodes
-    u, v, urange, vrange = utils.graphdata2tensors(
+    (sparse_fmt, arrays), urange, vrange = utils.graphdata2tensors(
        nx_graph, idtype, bipartite=True,
        edge_id_attr_name=edge_id_attr_name,
        top_map=top_map, bottom_map=bottom_map)

-    g = create_from_edges(u, v, utype, etype, vtype, urange, vrange)
+    g = create_from_edges(sparse_fmt, arrays, utype, etype, vtype, urange, vrange)

    # nx_graph.edges(data=True) returns src, dst, attr_dict
    has_edge_id = nx_graph.number_of_edges() > 0 and edge_id_attr_name is not None
@@ -1586,10 +1624,9 @@ DGLHeteroGraph.to_networkx = to_networkx
 # Internal APIs
 ############################################################

-def create_from_edges(u, v,
+def create_from_edges(sparse_fmt, arrays,
                      utype, etype, vtype,
                      urange, vrange,
-                      validate=True,
                      row_sorted=False,
                      col_sorted=False):
    """Internal function to create a graph from incident nodes with types.
@@ -1598,10 +1635,10 @@ def create_from_edges(u, v,

    Parameters
    ----------
-    u : Tensor
-        Source node IDs.
-    v : Tensor
-        Dest node IDs.
+    sparse_fmt : str
+        The sparse adjacency matrix format.
+    arrays : tuple[Tensor]
+        The sparse adjacency matrix arrays.
    utype : str
        Source node type name.
    etype : str
@@ -1614,8 +1651,6 @@ def create_from_edges(u, v,
    vrange : int, optional
        The destination node ID range. If None, the value is the
        maximum of the destination node IDs in the edge list plus 1. (Default: None)
-    validate : bool, optional
-        If True, checks if node IDs are within range.
    row_sorted : bool, optional
        Whether or not the rows of the COO are in ascending order.
    col_sorted : bool, optional
@@ -1627,24 +1662,21 @@ def create_from_edges(u, v,
    -------
    DGLHeteroGraph
    """
-    if validate:
-        if urange is not None and len(u) > 0 and \
-            urange <= F.as_scalar(F.max(u, dim=0)):
-            raise DGLError('Invalid node id {} (should be less than cardinality {}).'.format(
-                urange, F.as_scalar(F.max(u, dim=0))))
-        if vrange is not None and len(v) > 0 and \
-            vrange <= F.as_scalar(F.max(v, dim=0)):
-            raise DGLError('Invalid node id {} (should be less than cardinality {}).'.format(
-                vrange, F.as_scalar(F.max(v, dim=0))))
-
    if utype == vtype:
        num_ntypes = 1
    else:
        num_ntypes = 2

-    hgidx = heterograph_index.create_unitgraph_from_coo(
-        num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'],
-        row_sorted, col_sorted)
+    if sparse_fmt == 'coo':
+        u, v = arrays
+        hgidx = heterograph_index.create_unitgraph_from_coo(
+            num_ntypes, urange, vrange, u, v, ['coo', 'csr', 'csc'],
+            row_sorted, col_sorted)
+    else:   # 'csr' or 'csc'
+        indptr, indices, eids = arrays
+        hgidx = heterograph_index.create_unitgraph_from_csr(
+            num_ntypes, urange, vrange, indptr, indices, eids, ['coo', 'csr', 'csc'],
+            sparse_fmt == 'csc')
    if utype == vtype:
        return DGLHeteroGraph(hgidx, [utype], [etype])
    else:

--- a/python/dgl/data/fraud.py
+++ b/python/dgl/data/fraud.py
@@ -6,7 +6,6 @@ import numpy as np

 from .utils import save_graphs, load_graphs, _get_dgl_url
 from ..convert import heterograph
-from ..utils import graphdata2tensors
 from .dgl_dataset import DGLBuiltinDataset
 from .. import backend as F

@@ -106,8 +105,9 @@ class FraudDataset(DGLBuiltinDataset):

        graph_data = {}
        for relation in self.relations[self.name]:
-            u, v, _, _ = graphdata2tensors(data[relation])
-            graph_data[(self.node_name[self.name], relation, self.node_name[self.name])] = (u, v)
+            adj = data[relation].tocoo()
+            row, col = adj.row, adj.col
+            graph_data[(self.node_name[self.name], relation, self.node_name[self.name])] = (row, col)
        g = heterograph(graph_data)

        g.ndata['feature'] = F.tensor(node_features)

--- a/python/dgl/heterograph.py
+++ b/python/dgl/heterograph.py
@@ -69,9 +69,14 @@ class DGLHeteroGraph(object):
        if not isinstance(gidx, heterograph_index.HeteroGraphIndex):
            dgl_warning('Recommend creating graphs by `dgl.graph(data)`'
                        ' instead of `dgl.DGLGraph(data)`.')
-            u, v, num_src, num_dst = utils.graphdata2tensors(gidx)
-            gidx = heterograph_index.create_unitgraph_from_coo(
-                1, num_src, num_dst, u, v, ['coo', 'csr', 'csc'])
+            (sparse_fmt, arrays), num_src, num_dst = utils.graphdata2tensors(gidx)
+            if sparse_fmt == 'coo':
+                gidx = heterograph_index.create_unitgraph_from_coo(
+                    1, num_src, num_dst, arrays[0], arrays[1], ['coo', 'csr', 'csc'])
+            else:
+                gidx = heterograph_index.create_unitgraph_from_csr(
+                    1, num_src, num_dst, arrays[0], arrays[1], arrays[2], ['coo', 'csr', 'csc'],
+                    sparse_fmt == 'csc')
        if len(deprecate_kwargs) != 0:
            dgl_warning('Keyword arguments {} are deprecated in v0.5, and can be safely'
                        ' removed in all cases.'.format(list(deprecate_kwargs.keys())))
@@ -3506,23 +3511,23 @@ class DGLHeteroGraph(object):
        else:
            return deg

-    def adjacency_matrix(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
+    def adjacency_matrix(self, transpose=False, ctx=F.cpu(), scipy_fmt=None, etype=None):
        """Alias of :meth:`adj`"""
        return self.adj(transpose, ctx, scipy_fmt, etype)

-    def adj(self, transpose=True, ctx=F.cpu(), scipy_fmt=None, etype=None):
+    def adj(self, transpose=False, ctx=F.cpu(), scipy_fmt=None, etype=None):
        """Return the adjacency matrix of edges of the given edge type.

        By default, a row of returned adjacency matrix represents the
-        destination of an edge and the column represents the source.
+        source of an edge and the column represents the destination.

-        When transpose is True, a row represents the source and a column
-        represents a destination.
+        When transpose is True, a row represents the destination and a column
+        represents the source.

        Parameters
        ----------
        transpose : bool, optional
-            A flag to transpose the returned adjacency matrix. (Default: True)
+            A flag to transpose the returned adjacency matrix. (Default: False)
        ctx : context, optional
            The context of returned adjacency matrix. (Default: cpu)
        scipy_fmt : str, optional
@@ -3578,8 +3583,52 @@ class DGLHeteroGraph(object):
        else:
            return self._graph.adjacency_matrix_scipy(etid, transpose, scipy_fmt, False)

+    def adj_sparse(self, fmt, etype=None):
+        """Return the adjacency matrix of edges of the given edge type as tensors of
+        a sparse matrix representation.
+
+        By default, a row of returned adjacency matrix represents the
+        source of an edge and the column represents the destination.
+
+        Parameters
+        ----------
+        fmt : str
+            Either ``coo``, ``csr`` or ``csc``.
+        etype : str or (str, str, str), optional
+            The type names of the edges. The allowed type name formats are:
+
+            * ``(str, str, str)`` for source node type, edge type and destination node type.
+            * or one ``str`` edge type name if the name can uniquely identify a
+              triplet format in the graph.
+
+            Can be omitted if the graph has only one type of edges.
+
+        Returns
+        -------
+        tuple[Tensor]
+            If :attr:`fmt` is ``coo``, returns a pair of source and destination node ID
+            tensors.
+
+            If :attr:`fmt` is ``csr`` or ``csc``, return the CSR or CSC representation
+            of the adjacency matrix as a triplet of tensors
+            ``(indptr, indices, edge_ids)``.  Namely ``edge_ids`` could be an empty
+            tensor with 0 elements, in which case the edge IDs are consecutive
+            integers starting from 0.
+
+        Examples
+        --------
+        >>> g = dgl.graph(([0, 1, 2], [1, 2, 3]))
+        >>> g.adj_sparse('coo')
+        >>> g.adj_sparse('csr')
+        """
+        etid = self.get_etype_id(etype)
+        if fmt == 'csc':
+            # The first two elements are number of rows and columns
+            return self._graph.adjacency_matrix_tensors(etid, True, 'csr')[2:]
+        else:
+            return self._graph.adjacency_matrix_tensors(etid, False, fmt)[2:]

-    def adjacency_matrix_scipy(self, transpose=True, fmt='csr', return_edge_ids=None):
+    def adjacency_matrix_scipy(self, transpose=False, fmt='csr', return_edge_ids=None):
        """DEPRECATED: please use ``dgl.adjacency_matrix(transpose, scipy_fmt=fmt)``.
        """
        dgl_warning('DGLGraph.adjacency_matrix_scipy is deprecated. '

--- a/python/dgl/heterograph_index.py
+++ b/python/dgl/heterograph_index.py
@@ -600,11 +600,11 @@ class HeteroGraphIndex(ObjectBase):
    def adjacency_matrix(self, etype, transpose, ctx):
        """Return the adjacency matrix representation of this graph.

-        By default, a row of returned adjacency matrix represents the destination
-        of an edge and the column represents the source.
+        By default, a row of returned adjacency matrix represents the source
+        of an edge and the column represents the destination.

-        When transpose is True, a row represents the source and a column represents
-        a destination.
+        When transpose is True, a row represents the destination and a column represents
+        the source.

        Parameters
        ----------
@@ -630,8 +630,8 @@ class HeteroGraphIndex(ObjectBase):
        rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt)
        # convert to framework-specific sparse matrix
        srctype, dsttype = self.metagraph.find_edge(etype)
-        nrows = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
-        ncols = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
+        nrows = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
+        ncols = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
        nnz = self.number_of_edges(etype)
        if fmt == "csr":
            indptr = F.copy_to(F.from_dgl_nd(rst(0)), ctx)
@@ -653,11 +653,11 @@ class HeteroGraphIndex(ObjectBase):
    def adjacency_matrix_tensors(self, etype, transpose, fmt):
        """Return the adjacency matrix as a triplet of tensors.

-        By default, a row of returned adjacency matrix represents the destination
-        of an edge and the column represents the source.
+        By default, a row of returned adjacency matrix represents the source
+        of an edge and the column represents the destination.

-        When transpose is True, a row represents the source and a column represents
-        a destination.
+        When transpose is True, a row represents the destination and a column represents
+        the source.

        Parameters
        ----------
@@ -689,8 +689,8 @@ class HeteroGraphIndex(ObjectBase):

        rst = _CAPI_DGLHeteroGetAdj(self, int(etype), transpose, fmt)
        srctype, dsttype = self.metagraph.find_edge(etype)
-        nrows = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
-        ncols = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
+        nrows = self.number_of_nodes(dsttype) if transpose else self.number_of_nodes(srctype)
+        ncols = self.number_of_nodes(srctype) if transpose else self.number_of_nodes(dsttype)
        nnz = self.number_of_edges(etype)
        if fmt == "csr":
            indptr = F.from_dgl_nd(rst(0))
@@ -919,9 +919,9 @@ class HeteroGraphIndex(ObjectBase):
            The first element of the tuple is the shuffle order for outward graph
            The second element of the tuple is the shuffle order for inward graph
        """
-        csr = _CAPI_DGLHeteroGetAdj(self, int(etype), True, "csr")
+        csr = _CAPI_DGLHeteroGetAdj(self, int(etype), False, "csr")
        order = csr(2)
-        rev_csr = _CAPI_DGLHeteroGetAdj(self, int(etype), False, "csr")
+        rev_csr = _CAPI_DGLHeteroGetAdj(self, int(etype), True, "csr")
        rev_order = rev_csr(2)
        return utils.toindex(order, self.dtype), utils.toindex(rev_order, self.dtype)

@@ -1106,7 +1106,7 @@ def create_unitgraph_from_coo(num_ntypes, num_src, num_dst, row, col,
        formats, row_sorted, col_sorted)

 def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edge_ids,
-                              formats):
+                              formats, transpose=False):
    """Create a unitgraph graph index from CSR format

    Parameters
@@ -1125,6 +1125,8 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
        Edge shuffle id.
    formats : str
        Restrict the storage formats allowed for the unit graph.
+    transpose : bool, optional
+        If True, treats the input matrix as CSC.

    Returns
    -------
@@ -1135,7 +1137,7 @@ def create_unitgraph_from_csr(num_ntypes, num_src, num_dst, indptr, indices, edg
    return _CAPI_DGLHeteroCreateUnitGraphFromCSR(
        int(num_ntypes), int(num_src), int(num_dst),
        F.to_dgl_nd(indptr), F.to_dgl_nd(indices), F.to_dgl_nd(edge_ids),
-        formats)
+        formats, transpose)

 def create_heterograph_from_relations(metagraph, rel_graphs, num_nodes_per_type):
    """Create a heterograph from metagraph and graphs of every relation.

--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
@@ -954,7 +954,7 @@ def khop_adj(g, k):
    """
    assert g.is_homogeneous, \
        'only homogeneous graph is supported'
-    adj_k = g.adj(scipy_fmt=g.formats()['created'][0]) ** k
+    adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k
    return F.tensor(adj_k.todense().astype(np.float32))

 def khop_graph(g, k, copy_ndata=True):
@@ -1024,7 +1024,7 @@ def khop_graph(g, k, copy_ndata=True):
    assert g.is_homogeneous, \
        'only homogeneous graph is supported'
    n = g.number_of_nodes()
-    adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k
+    adj_k = g.adj(transpose=False, scipy_fmt=g.formats()['created'][0]) ** k
    adj_k = adj_k.tocoo()
    multiplicity = adj_k.data
    row = np.repeat(adj_k.row, multiplicity)
@@ -1280,7 +1280,7 @@ def laplacian_lambda_max(g):
    rst = []
    for g_i in g_arr:
        n = g_i.number_of_nodes()
-        adj = g_i.adj(scipy_fmt=g_i.formats()['created'][0]).astype(float)
+        adj = g_i.adj(transpose=True, scipy_fmt=g_i.formats()['created'][0]).astype(float)
        norm = sparse.diags(F.asnumpy(g_i.in_degrees()).clip(1) ** -0.5, dtype=float)
        laplacian = sparse.eye(n) - norm * adj * norm
        rst.append(sparse.linalg.eigs(laplacian, 1, which='LM',
@@ -1336,7 +1336,7 @@ def metapath_reachable_graph(g, metapath):
    """
    adj = 1
    for etype in metapath:
-        adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=True)
+        adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=False)

    adj = (adj != 0).tocsr()
    srctype = g.to_canonical_etype(metapath[0])[0]
@@ -2845,12 +2845,12 @@ def sort_in_edges(g, tag, tag_offset_name='_TAG_OFFSET'):
    -----------

    >>> g = dgl.graph(([0,1,2,3,4,0,1,2],[0,0,0,0,0,1,1,1]))
-    >>> g.adjacency_matrix(scipy_fmt='csr', transpose=False).nonzero()
+    >>> g.adjacency_matrix(scipy_fmt='csr', transpose=True).nonzero()
    (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32),
     array([0, 1, 2, 3, 4, 0, 1, 2], dtype=int32)))
    >>> tag = torch.IntTensor([1,1,0,2,0])
    >>> g_sorted = dgl.transform.sort_in_edges(g, tag)
-    >>> g_sorted.adjacency_matrix(scipy_fmt='csr', transpose=False).nonzero()
+    >>> g_sorted.adjacency_matrix(scipy_fmt='csr', transpose=True).nonzero()
    (array([0, 0, 0, 0, 0, 1, 1, 1], dtype=int32),
     array([2, 4, 0, 1, 3, 2, 0, 1], dtype=int32))
    >>> g_sorted.ndata['_TAG_OFFSET']

--- a/python/dgl/utils/data.py
+++ b/python/dgl/utils/data.py
 """Data utilities."""

+from collections import namedtuple
 import scipy as sp
 import networkx as nx

@@ -31,7 +32,9 @@ def elist2tensor(elist, idtype):
    return F.tensor(u, idtype), F.tensor(v, idtype)

 def scipy2tensor(spmat, idtype):
-    """Function to convert a scipy matrix to edge tensors.
+    """Function to convert a scipy matrix to a sparse adjacency matrix tuple.
+
+    Note that the data array of the scipy matrix is discarded.

    Parameters
    ----------
@@ -42,13 +45,20 @@ def scipy2tensor(spmat, idtype):

    Returns
    -------
-    (Tensor, Tensor)
-        Edge tensors.
+    (str, tuple[Tensor])
+        A tuple containing the format as well as the list of tensors representing
+        the sparse matrix.
    """
-    spmat = spmat.tocoo()
-    row = F.tensor(spmat.row, idtype)
-    col = F.tensor(spmat.col, idtype)
-    return row, col
+    if spmat.format in ['csr', 'csc']:
+        indptr = F.tensor(spmat.indptr, idtype)
+        indices = F.tensor(spmat.indices, idtype)
+        data = F.tensor([], idtype)
+        return SparseAdjTuple(spmat.format, (indptr, indices, data))
+    else:
+        spmat = spmat.tocoo()
+        row = F.tensor(spmat.row, idtype)
+        col = F.tensor(spmat.col, idtype)
+        return SparseAdjTuple('coo', (row, col))

 def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None):
    """Function to convert a networkx graph to edge tensors.
@@ -96,6 +106,8 @@ def networkx2tensor(nx_graph, idtype, edge_id_attr_name=None):
    dst = F.tensor(dst, idtype)
    return src, dst

+SparseAdjTuple = namedtuple('SparseAdjTuple', ['format', 'arrays'])
+
 def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
    """Function to convert various types of data to edge tensors and infer
    the number of nodes.
@@ -103,7 +115,14 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
    Parameters
    ----------
    data : graph data
-        Various kinds of graph data.
+        Various kinds of graph data.  Possible data types are:
+
+        - ``(row, col)``
+        - ``('coo', (row, col))``
+        - ``('csr', (indptr, indices, edge_ids))``
+        - ``('csc', (indptr, indices, edge_ids))``
+        - SciPy sparse matrix
+        - NetworkX graph
    idtype : int32, int64, optional
        Integer ID type. If None, try infer from the data and if fail use
        int64.
@@ -121,55 +140,52 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):

    Returns
    -------
-    src : Tensor
-        Src nodes.
-    dst : Tensor
-        Dst nodes.
+    data : SparseAdjTuple
+        A tuple with the sparse matrix format and the adjacency matrix tensors.
    num_src : int
-        Number of source nodes
+        Number of source nodes.
    num_dst : int
        Number of destination nodes.
    """
-    if idtype is None and not (isinstance(data, tuple) and F.is_tensor(data[0])):
+    # Convert tuple to SparseAdjTuple
+    if isinstance(data, tuple):
+        if not isinstance(data[0], str):
+            # (row, col) format, convert to ('coo', (row, col))
+            data = ('coo', data)
+        data = SparseAdjTuple(*data)
+
+    if idtype is None and \
+            not (isinstance(data, SparseAdjTuple) and F.is_tensor(data.arrays[0])):
        # preferred default idtype is int64
        # if data is tensor and idtype is None, infer the idtype from tensor
        idtype = F.int64
    checks.check_valid_idtype(idtype)

-    if isinstance(data, tuple) and (not F.is_tensor(data[0]) or not F.is_tensor(data[1])):
+    if isinstance(data, SparseAdjTuple) and (not all(F.is_tensor(a) for a in data.arrays)):
        # (Iterable, Iterable) type data, convert it to (Tensor, Tensor)
-        if len(data[0]) == 0:
+        if len(data.arrays[0]) == 0:
            # force idtype for empty list
-            data = F.tensor(data[0], idtype), F.tensor(data[1], idtype)
+            data = SparseAdjTuple(data.format, tuple(F.tensor(a, idtype) for a in data.arrays))
        else:
            # convert the iterable to tensor and keep its native data type so we can check
            # its validity later
-            data = F.tensor(data[0]), F.tensor(data[1])
+            data = SparseAdjTuple(data.format, tuple(F.tensor(a) for a in data.arrays))

-    if isinstance(data, tuple):
-        # (Tensor, Tensor) type data
-        src, dst = data
-        # sanity checks
-        # TODO(minjie): move these checks to C for faster graph construction.
-        if F.dtype(src) != F.dtype(dst):
-            raise DGLError('Expect the source and destination node IDs to have the same type,'
-                           ' but got {} and {}.'.format(F.dtype(src), F.dtype(dst)))
-        if F.context(src) != F.context(dst):
-            raise DGLError('Expect the source and destination node IDs to be on the same device,'
-                           ' but got {} and {}.'.format(F.context(src), F.context(dst)))
-        if F.dtype(src) not in (F.int32, F.int64):
-            raise DGLError('Expect the source ID tensor to have data type int32 or int64,'
-                           ' but got {}.'.format(F.dtype(src)))
-        if F.dtype(dst) not in (F.int32, F.int64):
-            raise DGLError('Expect the destination ID tensor to have data type int32 or int64,'
-                           ' but got {}.'.format(F.dtype(dst)))
+    if isinstance(data, SparseAdjTuple):
        if idtype is not None:
-            src, dst = F.astype(src, idtype), F.astype(dst, idtype)
+            data = SparseAdjTuple(data.format, tuple(F.astype(a, idtype) for a in data.arrays))
+        num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
    elif isinstance(data, list):
        src, dst = elist2tensor(data, idtype)
+        data = SparseAdjTuple('coo', (src, dst))
+        num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
    elif isinstance(data, sp.sparse.spmatrix):
-        src, dst = scipy2tensor(data, idtype)
+        # We can get scipy matrix's number of rows and columns easily.
+        num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
+        data = scipy2tensor(data, idtype)
    elif isinstance(data, nx.Graph):
+        # We can get networkx graph's number of sources and destinations easily.
+        num_src, num_dst = infer_num_nodes(data, bipartite=bipartite)
        edge_id_attr_name = kwargs.get('edge_id_attr_name', None)
        if bipartite:
            top_map = kwargs.get('top_map')
@@ -180,22 +196,11 @@ def graphdata2tensors(data, idtype=None, bipartite=False, **kwargs):
        else:
            src, dst = networkx2tensor(
                data, idtype, edge_id_attr_name=edge_id_attr_name)
+        data = SparseAdjTuple('coo', (src, dst))
    else:
        raise DGLError('Unsupported graph data type:', type(data))

-    if len(src) != len(dst):
-        raise DGLError('Expect the source and destination ID tensors to have the same length,'
-                       ' but got {} and {}.'.format(len(src), len(dst)))
-    if len(src) > 0 and (F.as_scalar(F.min(src, 0)) < 0 or F.as_scalar(F.min(dst, 0)) < 0):
-        raise DGLError('All IDs must be non-negative integers.')
-
-    # infer number of nodes
-    infer_from_raw = infer_num_nodes(data, bipartite=bipartite)
-    if infer_from_raw is None:
-        num_src, num_dst = infer_num_nodes((src, dst), bipartite=bipartite)
-    else:
-        num_src, num_dst = infer_from_raw
-    return src, dst, num_src, num_dst
+    return data, num_src, num_dst

 def networkxbipartite2tensors(nx_graph, idtype, top_map, bottom_map, edge_id_attr_name=None):
    """Function to convert a networkx bipartite to edge tensors.
@@ -264,9 +269,11 @@ def infer_num_nodes(data, bipartite=False):
    ----------
    data : graph data
        Supported types are:
-        * Tensor pair (u, v)
-        * SciPy matrix
-        * NetworkX graph
+
+        * SparseTuple ``(sparse_fmt, arrays)`` where ``arrays`` can be either ``(src, dst)`` or
+          ``(indptr, indices, data)``.
+        * SciPy matrix.
+        * NetworkX graph.
    bipartite : bool, optional
        Whether infer number of nodes of a bipartite graph --
        num_src and num_dst can be different.
@@ -283,10 +290,27 @@ def infer_num_nodes(data, bipartite=False):
    None
        If the inference failed.
    """
-    if isinstance(data, tuple) and len(data) == 2 and F.is_tensor(data[0]):
-        u, v = data
-        nsrc = F.as_scalar(F.max(u, dim=0)) + 1 if len(u) > 0 else 0
-        ndst = F.as_scalar(F.max(v, dim=0)) + 1 if len(v) > 0 else 0
+    if isinstance(data, tuple) and len(data) == 2:
+        if not isinstance(data[0], str):
+            raise TypeError('Expected sparse format as a str, but got %s' % type(data[0]))
+
+        if data[0] == 'coo':
+            # ('coo', (src, dst)) format
+            u, v = data[1]
+            nsrc = F.as_scalar(F.max(u, dim=0)) + 1 if len(u) > 0 else 0
+            ndst = F.as_scalar(F.max(v, dim=0)) + 1 if len(v) > 0 else 0
+        elif data[0] == 'csr':
+            # ('csr', (indptr, indices, eids)) format
+            indptr, indices, _ = data[1]
+            nsrc = F.shape(indptr)[0] - 1
+            ndst = F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0
+        elif data[0] == 'csc':
+            # ('csc', (indptr, indices, eids)) format
+            indptr, indices, _ = data[1]
+            ndst = F.shape(indptr)[0] - 1
+            nsrc = F.as_scalar(F.max(indices, dim=0)) + 1 if len(indices) > 0 else 0
+        else:
+            raise ValueError('unknown format %s' % data[0])
    elif isinstance(data, sp.sparse.spmatrix):
        nsrc, ndst = data.shape[0], data.shape[1]
    elif isinstance(data, nx.Graph):

--- a/src/graph/heterograph_capi.cc
+++ b/src/graph/heterograph_capi.cc
@@ -52,14 +52,20 @@ DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateUnitGraphFromCSR")
    IdArray indices = args[4];
    IdArray edge_ids = args[5];
    List<Value> formats = args[6];
+    bool transpose = args[7];
    std::vector<SparseFormat> formats_vec;
    for (Value val : formats) {
      std::string fmt = val->data;
      formats_vec.push_back(ParseSparseFormat(fmt));
    }
    const auto code = SparseFormatsToCode(formats_vec);
-    auto hgptr = CreateFromCSR(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
-    *rv = HeteroGraphRef(hgptr);
+    if (!transpose) {
+      auto hgptr = CreateFromCSR(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
+      *rv = HeteroGraphRef(hgptr);
+    } else {
+      auto hgptr = CreateFromCSC(nvtypes, num_src, num_dst, indptr, indices, edge_ids, code);
+      *rv = HeteroGraphRef(hgptr);
+    }
  });

 DGL_REGISTER_GLOBAL("heterograph_index._CAPI_DGLHeteroCreateHeteroGraph")

--- a/src/graph/unit_graph.cc
+++ b/src/graph/unit_graph.cc
@@ -456,6 +456,8 @@ class UnitGraph::CSR : public BaseHeteroGraph {
    if (aten::IsValidIdArray(edge_ids))
      CHECK((indices->shape[0] == edge_ids->shape[0]) || aten::IsNullArray(edge_ids))
        << "edge id arrays should have the same length as indices if not empty";
+    CHECK_EQ(num_src, indptr->shape[0] - 1)
+      << "number of nodes do not match the length of indptr minus 1.";

    adj_ = aten::CSRMatrix{num_src, num_dst, indptr, indices, edge_ids};
  }
@@ -1070,10 +1072,10 @@ std::vector<IdArray> UnitGraph::GetAdj(
  //   to_scipy_sparse_matrix. With the upcoming custom kernel change, we should change the
  //   behavior and make row for src and col for dst.
  if (fmt == std::string("csr")) {
-    return transpose? GetOutCSR()->GetAdj(etype, false, "csr")
+    return !transpose ? GetOutCSR()->GetAdj(etype, false, "csr")
      : GetInCSR()->GetAdj(etype, false, "csr");
  } else if (fmt == std::string("coo")) {
-    return GetCOO()->GetAdj(etype, !transpose, fmt);
+    return GetCOO()->GetAdj(etype, transpose, fmt);
  } else {
    LOG(FATAL) << "unsupported adjacency matrix format: " << fmt;
    return {};

--- a/tests/compute/test_csrmm.py
+++ b/tests/compute/test_csrmm.py
@@ -33,7 +33,7 @@ def test_csrmm(idtype, dtype):
    a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
    b, B = _random_simple_graph(idtype, dtype, F.ctx(), 600, 700, 9000, 'B', 'C', 'BC')
    C, C_weights = dgl.sparse._csrmm(A._graph, A.edata['w'], B._graph, B.edata['w'], 2)
-    C_adj = C.adjacency_matrix_scipy(0, True, 'csr')
+    C_adj = C.adjacency_matrix_scipy(0, False, 'csr')
    C_adj.data = F.asnumpy(C_weights)
    C_adj = F.tensor(C_adj.todense(), dtype=dtype)
    c = F.tensor((a * b).todense(), dtype=dtype)
@@ -83,7 +83,7 @@ def test_csrsum(idtype, dtype):
    a, A = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
    b, B = _random_simple_graph(idtype, dtype, F.ctx(), 500, 600, 9000, 'A', 'B', 'AB')
    C, C_weights = dgl.sparse._csrsum([A._graph, B._graph], [A.edata['w'], B.edata['w']])
-    C_adj = C.adjacency_matrix_scipy(0, True, 'csr')
+    C_adj = C.adjacency_matrix_scipy(0, False, 'csr')
    C_adj.data = F.asnumpy(C_weights)
    C_adj = F.tensor(C_adj.todense(), dtype=dtype)
    c = F.tensor((a + b).todense(), dtype=dtype)

--- a/tests/compute/test_graph.py
+++ b/tests/compute/test_graph.py
@@ -118,9 +118,9 @@ def test_query():
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray().T)
+                F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray())
+                F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())

    def _test(g):
        # test twice to see whether the cached format works or not
@@ -192,9 +192,9 @@ def test_query():
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray().T)
+                F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray())
+                F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())

    def _test_csr(g):
        # test twice to see whether the cached format works or not
@@ -253,8 +253,8 @@ def test_scipy_adjmat():
    adj_1 = g.adj(scipy_fmt='coo')
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

-    adj_t0 = g.adj(transpose=True, scipy_fmt='csr')
-    adj_t_1 = g.adj(transpose=True, scipy_fmt='coo')
+    adj_t0 = g.adj(transpose=False, scipy_fmt='csr')
+    adj_t_1 = g.adj(transpose=False, scipy_fmt='coo')
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

 def test_incmat():

--- a/tests/compute/test_heterograph.py
+++ b/tests/compute/test_heterograph.py
@@ -51,8 +51,8 @@ def create_test_heterograph2(idtype):
    g = dgl.heterograph({
        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
        ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
-        ('user', 'wishes', 'game'): ([0, 2], [1, 0]),
-        ('developer', 'develops', 'game'): ([0, 1], [0, 1]),
+        ('user', 'wishes', 'game'): ('csr', ([0, 1, 1, 2], [1, 0], [])),
+        ('developer', 'develops', 'game'): ('csc', ([0, 1, 2], [0, 1], [0, 1])),
        }, idtype=idtype, device=F.ctx())
    assert g.idtype == idtype
    assert g.device == F.ctx()
@@ -488,53 +488,53 @@ def _test_edge_ids():
 @parametrize_dtype
 def test_adj(idtype):
    g = create_test_heterograph(idtype)
-    adj = F.sparse_to_numpy(g.adj(transpose=False, etype='follows'))
+    adj = F.sparse_to_numpy(g.adj(transpose=True, etype='follows'))
    assert np.allclose(
            adj,
            np.array([[0., 0., 0.],
                      [1., 0., 0.],
                      [0., 1., 0.]]))
-    adj = F.sparse_to_numpy(g.adj(transpose=True, etype='follows'))
+    adj = F.sparse_to_numpy(g.adj(transpose=False, etype='follows'))
    assert np.allclose(
            adj,
            np.array([[0., 1., 0.],
                      [0., 0., 1.],
                      [0., 0., 0.]]))
-    adj = F.sparse_to_numpy(g.adj(transpose=False, etype='plays'))
+    adj = F.sparse_to_numpy(g.adj(transpose=True, etype='plays'))
    assert np.allclose(
            adj,
            np.array([[1., 1., 0.],
                      [0., 1., 1.]]))
-    adj = F.sparse_to_numpy(g.adj(transpose=True, etype='plays'))
+    adj = F.sparse_to_numpy(g.adj(transpose=False, etype='plays'))
    assert np.allclose(
            adj,
            np.array([[1., 0.],
                      [1., 1.],
                      [0., 1.]]))

-    adj = g.adj(transpose=False, scipy_fmt='csr', etype='follows')
+    adj = g.adj(transpose=True, scipy_fmt='csr', etype='follows')
    assert np.allclose(
            adj.todense(),
            np.array([[0., 0., 0.],
                      [1., 0., 0.],
                      [0., 1., 0.]]))
-    adj = g.adj(transpose=False, scipy_fmt='coo', etype='follows')
+    adj = g.adj(transpose=True, scipy_fmt='coo', etype='follows')
    assert np.allclose(
            adj.todense(),
            np.array([[0., 0., 0.],
                      [1., 0., 0.],
                      [0., 1., 0.]]))
-    adj = g.adj(transpose=False, scipy_fmt='csr', etype='plays')
+    adj = g.adj(transpose=True, scipy_fmt='csr', etype='plays')
    assert np.allclose(
            adj.todense(),
            np.array([[1., 1., 0.],
                      [0., 1., 1.]]))
-    adj = g.adj(transpose=False, scipy_fmt='coo', etype='plays')
+    adj = g.adj(transpose=True, scipy_fmt='coo', etype='plays')
    assert np.allclose(
            adj.todense(),
            np.array([[1., 1., 0.],
                      [0., 1., 1.]]))
-    adj = F.sparse_to_numpy(g['follows'].adj(transpose=False))
+    adj = F.sparse_to_numpy(g['follows'].adj(transpose=True))
    assert np.allclose(
            adj,
            np.array([[0., 0., 0.],
@@ -2648,6 +2648,63 @@ def test_create_block(idtype):
    assert hg.edges['AB'].data['x'] is eabx
    assert hg.edges['BA'].data['x'] is ebax

+@parametrize_dtype
+@pytest.mark.parametrize('fmt', ['coo', 'csr', 'csc'])
+def test_adj_sparse(idtype, fmt):
+    if fmt == 'coo':
+        A = ssp.random(10, 10, 0.2).tocoo()
+        A.data = np.arange(20)
+        row = F.tensor(A.row, idtype)
+        col = F.tensor(A.col, idtype)
+        g = dgl.graph((row, col))
+    elif fmt == 'csr':
+        A = ssp.random(10, 10, 0.2).tocsr()
+        A.data = np.arange(20)
+        indptr = F.tensor(A.indptr, idtype)
+        indices = F.tensor(A.indices, idtype)
+        g = dgl.graph(('csr', (indptr, indices, [])))
+        with pytest.raises(DGLError):
+            g2 = dgl.graph(('csr', (indptr[:-1], indices, [])), num_nodes=10)
+    elif fmt == 'csc':
+        A = ssp.random(10, 10, 0.2).tocsc()
+        A.data = np.arange(20)
+        indptr = F.tensor(A.indptr, idtype)
+        indices = F.tensor(A.indices, idtype)
+        g = dgl.graph(('csc', (indptr, indices, [])))
+        with pytest.raises(DGLError):
+            g2 = dgl.graph(('csr', (indptr[:-1], indices, [])), num_nodes=10)
+
+    A_coo = A.tocoo()
+    A_csr = A.tocsr()
+    A_csc = A.tocsc()
+    row, col = g.adj_sparse('coo')
+    assert np.array_equal(F.asnumpy(row), A_coo.row)
+    assert np.array_equal(F.asnumpy(col), A_coo.col)
+
+    indptr, indices, eids = g.adj_sparse('csr')
+    assert np.array_equal(F.asnumpy(indptr), A_csr.indptr)
+    if fmt == 'csr':
+        assert len(eids) == 0
+        assert np.array_equal(F.asnumpy(indices), A_csr.indices)
+    else:
+        indices_sorted = F.zeros(len(indices), idtype)
+        indices_sorted = F.scatter_row(indices_sorted, eids, indices)
+        indices_sorted_np = np.zeros(len(indices), dtype=A_csr.indices.dtype)
+        indices_sorted_np[A_csr.data] = A_csr.indices
+        assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np)
+
+    indptr, indices, eids = g.adj_sparse('csc')
+    assert np.array_equal(F.asnumpy(indptr), A_csc.indptr)
+    if fmt == 'csc':
+        assert len(eids) == 0
+        assert np.array_equal(F.asnumpy(indices), A_csc.indices)
+    else:
+        indices_sorted = F.zeros(len(indices), idtype)
+        indices_sorted = F.scatter_row(indices_sorted, eids, indices)
+        indices_sorted_np = np.zeros(len(indices), dtype=A_csc.indices.dtype)
+        indices_sorted_np[A_csc.data] = A_csc.indices
+        assert np.array_equal(F.asnumpy(indices_sorted), indices_sorted_np)
+

 if __name__ == '__main__':
    # test_create()

--- a/tests/compute/test_sort.py
+++ b/tests/compute/test_sort.py
@@ -62,8 +62,8 @@ def test_sort_with_tag(idtype):
    assert(not check_sort(old_csr, tag))  # Check the original csr is not modified.

    new_g = dgl.sort_in_edges(g, tag)
-    old_csc = g.adjacency_matrix(transpose=False, scipy_fmt='csr')
-    new_csc = new_g.adjacency_matrix(transpose=False, scipy_fmt='csr')
+    old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
+    new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
    assert(check_sort(new_csc, tag, new_g.ndata["_TAG_OFFSET"]))
    assert(not check_sort(old_csc, tag))

@@ -83,8 +83,8 @@ def test_sort_with_tag_bipartite(idtype):
    assert(not check_sort(old_csr, vtag))

    new_g = dgl.sort_in_edges(g, utag)
-    old_csc = g.adjacency_matrix(transpose=False, scipy_fmt='csr')
-    new_csc = new_g.adjacency_matrix(transpose=False, scipy_fmt='csr')
+    old_csc = g.adjacency_matrix(transpose=True, scipy_fmt='csr')
+    new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt='csr')
    assert(check_sort(new_csc, utag, new_g.nodes['_V'].data['_TAG_OFFSET']))
    assert(not check_sort(old_csc, utag))


--- a/tests/compute/test_traversal.py
+++ b/tests/compute/test_traversal.py
@@ -67,7 +67,7 @@ def test_topological_nodes(idtype, n=100):

    layers_dgl = dgl.topological_nodes_generator(g)

-    adjmat = g.adjacency_matrix(transpose=False)
+    adjmat = g.adjacency_matrix(transpose=True)
    def tensor_topo_traverse():
        n = g.number_of_nodes()
        mask = F.copy_to(F.ones((n, 1)), F.cpu())

--- a/tests/distributed/test_shared_mem_store.py
+++ b/tests/distributed/test_shared_mem_store.py
@@ -134,7 +134,7 @@ def check_compute_func(worker_id, graph_name, return_dict):
        in_feats = g.nodes[0].data['feat'].shape[1]
        # Test update all.
        g.update_all(fn.copy_src(src='feat', out='m'), fn.sum(msg='m', out='preprocess'))
-        adj = g.adjacency_matrix()
+        adj = g.adjacency_matrix(transpose=True)
        tmp = F.spmm(adj, g.nodes[:].data['feat'])
        assert_almost_equal(F.asnumpy(g.nodes[:].data['preprocess']), F.asnumpy(tmp))
        g._sync_barrier(60)
@@ -261,13 +261,13 @@ def check_mem(gidx, cond_v, shared_v):
    cond_v.release()

    gidx1 = dgl.graph_index.from_shared_mem_graph_index("test_graph5")
-    in_csr = gidx.adjacency_matrix_scipy(False, "csr")
-    out_csr = gidx.adjacency_matrix_scipy(True, "csr")
+    in_csr = gidx.adjacency_matrix_scipy(True, "csr")
+    out_csr = gidx.adjacency_matrix_scipy(False, "csr")

-    in_csr1 = gidx1.adjacency_matrix_scipy(False, "csr")
+    in_csr1 = gidx1.adjacency_matrix_scipy(True, "csr")
    assert_array_equal(in_csr.indptr, in_csr1.indptr)
    assert_array_equal(in_csr.indices, in_csr1.indices)
-    out_csr1 = gidx1.adjacency_matrix_scipy(True, "csr")
+    out_csr1 = gidx1.adjacency_matrix_scipy(False, "csr")
    assert_array_equal(out_csr.indptr, out_csr1.indptr)
    assert_array_equal(out_csr.indices, out_csr1.indices)


--- a/tests/mxnet/test_nn.py
+++ b/tests/mxnet/test_nn.py
@@ -25,7 +25,7 @@ def test_graph_conv(idtype, out_dim):
    g = dgl.from_networkx(nx.path_graph(3))
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx)
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx)

    conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
    conv.initialize(ctx=ctx)
@@ -136,7 +136,7 @@ def _S2AXWb(A, N, X, W, b):
 def test_tagconv(out_dim):
    g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx)
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx)
    norm = mx.nd.power(g.in_degrees().astype('float32'), -0.5)

    conv = nn.TAGConv(5, out_dim, bias=True)
@@ -317,7 +317,7 @@ def test_dense_cheb_conv(out_dim):
    for k in range(1, 4):
        ctx = F.ctx()
        g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx())
-        adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default')
+        adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
        cheb = nn.ChebConv(5, out_dim, k)
        dense_cheb = nn.DenseChebConv(5, out_dim, k)
        cheb.initialize(ctx=ctx)
@@ -342,7 +342,7 @@ def test_dense_cheb_conv(out_dim):
 def test_dense_graph_conv(idtype, g, norm_type, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default')
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
    conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True)
    dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True)
    conv.initialize(ctx=ctx)
@@ -362,7 +362,7 @@ def test_dense_graph_conv(idtype, g, norm_type, out_dim):
 def test_dense_sage_conv(idtype, g, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx).tostype('default')
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
    sage = nn.SAGEConv(5, out_dim, 'gcn')
    dense_sage = nn.DenseSAGEConv(5, out_dim)
    sage.initialize(ctx=ctx)

--- a/tests/pytorch/test_nn.py
+++ b/tests/pytorch/test_nn.py
@@ -24,7 +24,7 @@ def _AXWb(A, X, W, b):
 def test_graph_conv0(out_dim):
    g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx)
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx)

    conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
    conv = conv.to(ctx)
@@ -186,7 +186,7 @@ def test_tagconv(out_dim):
    g = dgl.DGLGraph(nx.path_graph(3))
    g = g.to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx)
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx)
    norm = th.pow(g.in_degrees().float(), -0.5)

    conv = nn.TAGConv(5, out_dim, bias=True)
@@ -806,7 +806,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
    # TODO(minjie): enable the following option after #1385
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense()
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
    conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True)
    dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True)
    dense_conv.weight.data = conv.weight.data
@@ -824,7 +824,7 @@ def test_dense_graph_conv(norm_type, g, idtype, out_dim):
 def test_dense_sage_conv(g, idtype, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense()
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
    sage = nn.SAGEConv(5, out_dim, 'gcn')
    dense_sage = nn.DenseSAGEConv(5, out_dim)
    dense_sage.fc.weight.data = sage.fc_neigh.weight.data
@@ -911,7 +911,7 @@ def test_dense_cheb_conv(out_dim):
        ctx = F.ctx()
        g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True)
        g = g.to(F.ctx())
-        adj = g.adjacency_matrix(transpose=False, ctx=ctx).to_dense()
+        adj = g.adjacency_matrix(transpose=True, ctx=ctx).to_dense()
        cheb = nn.ChebConv(5, out_dim, k, None)
        dense_cheb = nn.DenseChebConv(5, out_dim, k)
        #for i in range(len(cheb.fc)):