transform.py 66.2 KB
Newer Older
1
2
"""Module for graph transformation utilities."""

3
from collections.abc import Iterable, Mapping
4
from collections import defaultdict
5
6
import numpy as np
from scipy import sparse
Da Zheng's avatar
Da Zheng committed
7

8
from ._ffi.function import _init_api
9
from .base import dgl_warning, DGLError
10
from . import convert
11
from .heterograph import DGLHeteroGraph, DGLBlock
12
from . import ndarray as nd
13
from . import backend as F
14
from . import utils, batch
15
16
17
from .partition import metis_partition_assignment
from .partition import partition_graph_with_halo
from .partition import metis_partition
18

19
# TO BE DEPRECATED
20
from ._deprecate.graph import DGLGraph as DGLGraphStale
21

22
23
24
25
26
27
__all__ = [
    'line_graph',
    'khop_adj',
    'khop_graph',
    'reverse',
    'to_bidirected',
28
    'to_bidirected_stale',
29
    'add_reverse_edges',
30
31
32
    'laplacian_lambda_max',
    'knn_graph',
    'segmented_knn_graph',
33
34
35
36
    'add_edges',
    'add_nodes',
    'remove_edges',
    'remove_nodes',
37
38
39
40
    'add_self_loop',
    'remove_self_loop',
    'metapath_reachable_graph',
    'compact_graphs',
41
    'to_block',
42
    'to_simple',
43
    'to_simple_graph',
44
    'as_immutable_graph',
45
46
47
    'metis_partition_assignment',
    'partition_graph_with_halo',
    'metis_partition',
48
    'as_heterograph']
49
50


51
52
53
54
55
56
57
58
59
60
def pairwise_squared_distance(x):
    """
    x : (n_samples, n_points, dims)
    return : (n_samples, n_points, n_points)
    """
    x2s = F.sum(x * x, -1, True)
    # assuming that __matmul__ is always implemented (true for PyTorch, MXNet and Chainer)
    return x2s + F.swapaxes(x2s, -1, -2) - 2 * x @ F.swapaxes(x, -1, -2)

#pylint: disable=invalid-name
61
def knn_graph(x, k):
62
63
    """Construct a graph from a set of points according to k-nearest-neighbor (KNN)
    and return.
64

65
    The function transforms the coordinates/features of a point set
66
    into a directed homogeneous graph. The coordinates of the point
67
68
69
    set is specified as a matrix whose rows correspond to points and
    columns correspond to coordinate/feature dimensions.

70
71
    The nodes of the returned graph correspond to the points, where the predecessors
    of each point are its k-nearest neighbors measured by the Euclidean distance.
72

73
74
    If :attr:`x` is a 3D tensor, then each submatrix will be transformed
    into a separate graph. DGL then composes the graphs into a large
75
    graph of multiple connected components.
76
77
78

    Parameters
    ----------
79
80
    x : Tensor
        The point coordinates. It can be either on CPU or GPU.
81

82
        * If is 2D, ``x[i]`` corresponds to the i-th node in the KNN graph.
83

84
        * If is 3D, ``x[i]`` corresponds to the i-th KNN graph and
85
          ``x[i][j]`` corresponds to the j-th node in the i-th KNN graph.
86
    k : int
87
        The number of nearest neighbors per node.
88
89
90
91

    Returns
    -------
    DGLGraph
92
        The constructred graph. The node IDs are in the same order as :attr:`x`.
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129

        The returned graph is on CPU, regardless of the context of input :attr:`x`.

    Examples
    --------

    The following examples use PyTorch backend.

    >>> import dgl
    >>> import torch

    When :attr:`x` is a 2D tensor, a single KNN graph is constructed.

    >>> x = torch.tensor([[0.0, 0.0, 1.0],
    ...                   [1.0, 0.5, 0.5],
    ...                   [0.5, 0.2, 0.2],
    ...                   [0.3, 0.2, 0.4]])
    >>> knn_g = dgl.knn_graph(x, 2)  # Each node has two predecessors
    >>> knn_g.edges()
    >>> (tensor([0, 1, 2, 2, 2, 3, 3, 3]), tensor([0, 1, 1, 2, 3, 0, 2, 3]))

    When :attr:`x` is a 3D tensor, DGL constructs multiple KNN graphs and
    and then composes them into a graph of multiple connected components.

    >>> x1 = torch.tensor([[0.0, 0.0, 1.0],
    ...                    [1.0, 0.5, 0.5],
    ...                    [0.5, 0.2, 0.2],
    ...                    [0.3, 0.2, 0.4]])
    >>> x2 = torch.tensor([[0.0, 1.0, 1.0],
    ...                    [0.3, 0.3, 0.3],
    ...                    [0.4, 0.4, 1.0],
    ...                    [0.3, 0.8, 0.2]])
    >>> x = torch.stack([x1, x2], dim=0)
    >>> knn_g = dgl.knn_graph(x, 2)  # Each node has two predecessors
    >>> knn_g.edges()
    (tensor([0, 1, 2, 2, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 7]),
     tensor([0, 1, 1, 2, 3, 0, 2, 3, 4, 5, 6, 7, 4, 6, 5, 7]))
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
    """
    if F.ndim(x) == 2:
        x = F.unsqueeze(x, 0)
    n_samples, n_points, _ = F.shape(x)

    dist = pairwise_squared_distance(x)
    k_indices = F.argtopk(dist, k, 2, descending=False)
    dst = F.copy_to(k_indices, F.cpu())

    src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1))

    per_sample_offset = F.reshape(F.arange(0, n_samples) * n_points, (-1, 1, 1))
    dst += per_sample_offset
    src += per_sample_offset
    dst = F.reshape(dst, (-1,))
    src = F.reshape(src, (-1,))
146
147
    adj = sparse.csr_matrix(
        (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))),
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
148
        shape=(n_samples * n_points, n_samples * n_points))
149

150
    return convert.from_scipy(adj)
151
152

#pylint: disable=invalid-name
153
def segmented_knn_graph(x, k, segs):
154
155
    """Construct multiple graphs from multiple sets of points according to
    k-nearest-neighbor (KNN) and return.
156

157
158
159
    Compared with :func:`dgl.knn_graph`, this allows multiple point sets with
    different capacity. The points from different sets are stored contiguously
    in the :attr:`x` tensor.
160
161
    :attr:`segs` specifies the number of points in each point set. The
    function constructs a KNN graph for each point set, where the predecessors
162
163
    of each point are its k-nearest neighbors measured by the Euclidean distance.
    DGL then composes all KNN graphs
164
    into a graph with multiple connected components.
165
166
167

    Parameters
    ----------
168
169
    x : Tensor
        Coordinates/features of points. Must be 2D. It can be either on CPU or GPU.
170
    k : int
171
        The number of nearest neighbors per node.
172
    segs : list[int]
173
174
        Number of points in each point set. The numbers in :attr:`segs`
        must sum up to the number of rows in :attr:`x`.
175
176
177
178

    Returns
    -------
    DGLGraph
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
        The graph. The node IDs are in the same order as :attr:`x`.

        The returned graph is on CPU, regardless of the context of input :attr:`x`.

    Examples
    --------

    The following examples use PyTorch backend.

    >>> import dgl
    >>> import torch

    In the example below, the first point set has three points
    and the second point set has four points.

    >>> # Features/coordinates of the first point set
    >>> x1 = torch.tensor([[0.0, 0.5, 0.2],
    ...                    [0.1, 0.3, 0.2],
    ...                    [0.4, 0.2, 0.2]])
    >>> # Features/coordinates of the second point set
    >>> x2 = torch.tensor([[0.3, 0.2, 0.1],
    ...                    [0.5, 0.2, 0.3],
    ...                    [0.1, 0.1, 0.2],
    ...                    [0.6, 0.3, 0.3]])
    >>> x = torch.cat([x1, x2], dim=0)
    >>> segs = [x1.shape[0], x2.shape[0]]
    >>> knn_g = dgl.segmented_knn_graph(x, 2, segs)
    >>> knn_g.edges()
    (tensor([0, 0, 1, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6]),
     tensor([0, 1, 0, 1, 2, 2, 3, 5, 4, 6, 3, 5, 4, 6]))
209
210
211
212
213
214
215
    """
    n_total_points, _ = F.shape(x)
    offset = np.insert(np.cumsum(segs), 0, 0)

    h_list = F.split(x, segs, 0)
    dst = [
        F.argtopk(pairwise_squared_distance(h_g), k, 1, descending=False) +
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
216
        int(offset[i])
217
218
219
220
221
222
223
224
        for i, h_g in enumerate(h_list)]
    dst = F.cat(dst, 0)
    src = F.arange(0, n_total_points).unsqueeze(1).expand(n_total_points, k)

    dst = F.reshape(dst, (-1,))
    src = F.reshape(src, (-1,))
    adj = sparse.csr_matrix((F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))))

225
    return convert.from_scipy(adj)
226

227
228
def to_bidirected(g, copy_ndata=False, readonly=None):
    r"""Convert the graph to a bi-directional simple graph and return.
229

230
231
232
233
    For an input graph :math:`G`, return a new graph :math:`G'` such that an edge
    :math:`(u, v)\in G'` if and only if there exists an edge :math:`(u, v)\in G` or
    an edge :math:`(v, u)\in G`. The resulting graph :math:`G'` is a simple graph,
    meaning there is no parallel edge.
234

235
236
237
    The operation only works for edges whose two endpoints belong to the same node type.
    DGL will raise error if the input graph is heterogeneous and contains edges
    with different types of endpoints.
238
239
240
241
242
243
244

    Parameters
    ----------
    g : DGLGraph
        The input graph.
    copy_ndata: bool, optional
        If True, the node features of the bidirected graph are copied from the
245
        original graph. If False, the bidirected graph will not have any node features.
246
        (Default: False)
247
248
    readonly : bool
        **DEPRECATED**.
249
250
251

    Returns
    -------
252
    DGLGraph
253
254
        The bidirected graph

255
256
    Notes
    -----
257
258
259
    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
    tensors with the input graph. Hence, users should try to avoid in-place operations
    which will be visible to both graphs.
260

261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
    Examples
    --------
    The following examples use PyTorch backend.

    >>> import dgl
    >>> import torch as th
    >>> g = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 0])))
    >>> bg1 = dgl.to_bidirected(g)
    >>> bg1.edges()
    (tensor([0, 1, 2, 1, 2, 0]), tensor([1, 2, 0, 0, 1, 2]))

    The graph already have i->j and j->i

    >>> g = dgl.graph((th.tensor([0, 1, 2, 0]), th.tensor([1, 2, 0, 2])))
    >>> bg1 = dgl.to_bidirected(g)
    >>> bg1.edges()
    (tensor([0, 1, 2, 1, 2, 0]), tensor([1, 2, 0, 0, 1, 2]))

279
    **Heterogeneous graphs with Multiple Edge Types**
280
281

    >>> g = dgl.heterograph({
282
283
284
    ...     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2]), th.tensor([1, 1, 2, 0])),
    ...     ('user', 'follows', 'user'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
    ... })
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
    >>> bg1 = dgl.to_bidirected(g)
    >>> bg1.edges(etype='wins')
    (tensor([0, 0, 1, 1, 2, 2]), tensor([1, 2, 0, 2, 0, 1]))
    >>> bg1.edges(etype='follows')
    (tensor([1, 1, 2]), tensor([1, 2, 1]))
    """
    if readonly is not None:
        dgl_warning("Parameter readonly is deprecated" \
            "There will be no difference between readonly and non-readonly DGLGraph")

    for c_etype in g.canonical_etypes:
        if c_etype[0] != c_etype[2]:
            assert False, "to_bidirected is not well defined for " \
                "unidirectional bipartite graphs" \
                ", but {} is unidirectional bipartite".format(c_etype)

    assert g.is_multigraph is False, "to_bidirected only support simple graph"

    g = add_reverse_edges(g, copy_ndata=copy_ndata, copy_edata=False)
    g = to_simple(g, return_counts=None, copy_ndata=copy_ndata, copy_edata=False)
    return g

def add_reverse_edges(g, readonly=None, copy_ndata=True,
                      copy_edata=False, ignore_bipartite=False):
309
    r"""Add an reversed edge for each edge in the input graph and return a new graph.
310
311
312
313
314

    For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this
    function creates a new graph with edges
    :math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`.

315
316
317
318
    The operation only works for edges whose two endpoints belong to the same node type.
    DGL will raise error if the input graph is heterogeneous and contains edges
    with different types of endpoints. If :attr:`ignore_bipartite` is true, DGL will
    ignore those edges instead.
319
320
321
322

    Parameters
    ----------
    g : DGLGraph
323
        The input graph.
324
325
326
    readonly : bool, default to be True
        Deprecated. There will be no difference between readonly and non-readonly
    copy_ndata: bool, optional
327
328
329
        If True, the node features of the new graph are copied from
        the original graph. If False, the new graph will not have any
        node features.
330

331
332
333
334
        (Default: True)
    copy_edata: bool, optional
        If True, the features of the reversed edges will be identical to
        the original ones."
335
336
337

        If False, the new graph will not have any edge features.

338
339
340
341
        (Default: False)
    ignore_bipartite: bool, optional
        If True, unidirectional bipartite graphs are ignored and
        no error is raised. If False, an error  will be raised if
342
        an edge type of the input heterogeneous graph is for a unidirectional
343
344
345
346
        bipartite graph.

    Returns
    -------
347
    DGLGraph
348
        The graph with reversed edges added.
349
350
351

    Notes
    -----
352
353
354
355
    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
    tensors with the input graph. Hence, users should try to avoid in-place operations
    which will be visible to both graphs. On the contrary, the two graphs do not share
    the same edge feature storage.
356
357
358

    Examples
    --------
359
    **Homogeneous graphs**
360

361
    >>> g = dgl.graph((th.tensor([0, 0]), th.tensor([0, 1])))
362
    >>> bg1 = dgl.add_reverse_edges(g)
363
364
365
    >>> bg1.edges()
    (tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0]))

366
    **Heterogeneous graphs**
367

368
    >>> g = dgl.heterograph({
369
370
371
372
    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
    >>>     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])),
    >>>     ('user', 'follows', 'user'): (th.tensor([1, 2, 1), th.tensor([0, 0, 0]))
    >>> })
373
374
375
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['wins'].data['h'] = th.tensor([0, 1, 2, 3, 4])

376
377
378
379
    The :func:`add_reverse_edges` operation is applied to the edge type
    ``('user', 'wins', 'user')`` and the edge type ``('user', 'follows', 'user')``.
    The edge type ``('user', 'plays', 'game')`` is ignored.  Both the node features and
    edge features are shared.
380

381
    >>> bg = dgl.add_reverse_edges(g, copy_ndata=True,
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
                               copy_edata=True, ignore_bipartite=True)
    >>> bg.edges(('user', 'wins', 'user'))
    (tensor([0, 2, 0, 2, 2, 1, 1, 2, 1, 0]), tensor([1, 1, 2, 1, 0, 0, 2, 0, 2, 2]))
    >>> bg.edges(('user', 'follows', 'user'))
    (tensor([1, 2, 1, 0, 0, 0]), tensor([0, 0, 0, 1, 2, 1]))
    >>> bg.edges(('user', 'plays', 'game'))
    (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
    >>> bg.nodes['game'].data['hv']
    tensor([0, 0, 0])
    >>> bg.edges[('user', 'wins', 'user')].data['h']
    th.tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
    """
    if readonly is not None:
        dgl_warning("Parameter readonly is deprecated" \
            "There will be no difference between readonly and non-readonly DGLGraph")

398
399
400
401
402
    # get node cnt for each ntype
    num_nodes_dict = {}
    for ntype in g.ntypes:
        num_nodes_dict[ntype] = g.number_of_nodes(ntype)

403
    canonical_etypes = g.canonical_etypes
404
    num_nodes_dict = {ntype: g.number_of_nodes(ntype) for ntype in g.ntypes}
405
406
407
408
409
    # fast path
    if ignore_bipartite is False:
        subgs = {}
        for c_etype in canonical_etypes:
            if c_etype[0] != c_etype[2]:
410
                assert False, "add_reverse_edges is not well defined for " \
411
412
413
414
415
416
                    "unidirectional bipartite graphs" \
                    ", but {} is unidirectional bipartite".format(c_etype)

            u, v = g.edges(form='uv', order='eid', etype=c_etype)
            subgs[c_etype] = (F.cat([u, v], dim=0), F.cat([v, u], dim=0))

417
        new_g = convert.heterograph(subgs, num_nodes_dict=num_nodes_dict)
418
419
420
421
422
423
424
425
426
427
    else:
        subgs = {}
        for c_etype in canonical_etypes:
            if c_etype[0] != c_etype[2]:
                u, v = g.edges(form='uv', order='eid', etype=c_etype)
                subgs[c_etype] = (u, v)
            else:
                u, v = g.edges(form='uv', order='eid', etype=c_etype)
                subgs[c_etype] = (F.cat([u, v], dim=0), F.cat([v, u], dim=0))

428
        new_g = convert.heterograph(subgs, num_nodes_dict=num_nodes_dict)
429
430
431

    # handle features
    if copy_ndata:
432
433
        node_frames = utils.extract_node_subframes(g, None)
        utils.set_new_frames(new_g, node_frames=node_frames)
434
435

    if copy_edata:
436
437
        # find indices
        eids = []
438
        for c_etype in canonical_etypes:
439
            eid = F.copy_to(F.arange(0, g.number_of_edges(c_etype)), new_g.device)
440
            if c_etype[0] != c_etype[2]:
441
                eids.append(eid)
442
            else:
443
444
445
446
447
                eids.append(F.cat([eid, eid], 0))

        edge_frames = utils.extract_edge_subframes(g, eids)
        utils.set_new_frames(new_g, edge_frames=edge_frames)

448
449
    return new_g

450
451
452
def line_graph(g, backtracking=True, shared=False):
    """Return the line graph of this graph.

453
454
455
456
    The line graph ``L(G)`` of a given graph ``G`` is defined as another graph where
    the nodes in ``L(G)`` maps to the edges in ``G``.  For any pair of edges ``(u, v)``
    and ``(v, w)`` in ``G``, the corresponding node of edge ``(u, v)`` in ``L(G)`` will
    have an edge connecting to the corresponding node of edge ``(v, w)``.
457
458
459

    Parameters
    ----------
460
    g : DGLGraph
461
        Input graph.  Must be homogeneous.
462
    backtracking : bool, optional
463
464
465
466
        If False, the line graph node corresponding to edge ``(u, v)`` will not have
        an edge connecting to the line graph node corresponding to edge ``(v, u)``.

        Default: True.
467
468
469
    shared : bool, optional
        Whether to copy the edge features of the original graph as the node features
        of the result line graph.
470
471
472

    Returns
    -------
473
    G : DGLGraph
474
475
        The line graph of this graph.

476
477
    Notes
    -----
478
479
480
    * If :attr:`shared` is True, the node features of the resulting graph share the same
      storage with the edge features of the input graph. Hence, users should try to
      avoid in-place operations which will be visible to both graphs.
481

482
    * The function supports input graph on GPU but copies it to CPU during computation.
483
484
485

    Examples
    --------
486
487
488
489
490
491
    Assume that the graph has the following adjacency matrix: ::

       A = [[0, 0, 1],
            [1, 0, 1],
            [1, 1, 0]]

492
493
494
    >>> g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 'user', 'follows')
    >>> lg = g.line_graph()
    >>> lg
495
496
497
    Graph(num_nodes=5, num_edges=8,
    ndata_schemes={}
    edata_schemes={})
498
    >>> lg.edges()
499
    (tensor([0, 0, 1, 2, 2, 3, 4, 4]), tensor([3, 4, 0, 3, 4, 0, 1, 2]))
500
501
    >>> lg = g.line_graph(backtracking=False)
    >>> lg
502
503
504
    Graph(num_nodes=5, num_edges=4,
    ndata_schemes={}
    edata_schemes={})
505
    >>> lg.edges()
506
    (tensor([0, 1, 2, 4]), tensor([4, 0, 3, 1]))
507
    """
508
    assert g.is_homogeneous, \
509
        'only homogeneous graph is supported'
510
511
512
513

    dev = g.device
    lg = DGLHeteroGraph(_CAPI_DGLHeteroLineGraph(g._graph.copy_to(nd.cpu()), backtracking))
    lg = lg.to(dev)
514
    if shared:
515
516
517
        new_frames = utils.extract_edge_subframes(g, None)
        utils.set_new_frames(lg, node_frames=new_frames)

518
    return lg
519

520
DGLHeteroGraph.line_graph = utils.alias_func(line_graph)
521

522
def khop_adj(g, k):
523
    """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of the graph
524
    :math:`g`.
525

526
    The returned matrix is a 32-bit float dense matrix on CPU. The graph must be homogeneous.
527
528
529

    Parameters
    ----------
530
    g : DGLGraph
531
532
533
534
535
536
        The input graph.
    k : int
        The :math:`k` in :math:`A^k`.

    Returns
    -------
537
    Tensor
538
        The returned tensor.
539
540
541
542

    Examples
    --------
    >>> import dgl
543
    >>> g = dgl.graph(([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0]))
544
545
546
547
548
549
550
551
552
553
554
555
556
    >>> dgl.khop_adj(g, 1)
    tensor([[1., 0., 0., 0., 1.],
            [1., 1., 0., 0., 0.],
            [0., 1., 1., 0., 0.],
            [0., 0., 1., 1., 0.],
            [0., 0., 0., 1., 1.]])
    >>> dgl.khop_adj(g, 3)
    tensor([[1., 0., 1., 3., 3.],
            [3., 1., 0., 1., 3.],
            [3., 3., 1., 0., 1.],
            [1., 3., 3., 1., 0.],
            [0., 1., 3., 3., 1.]])
    """
557
    assert g.is_homogeneous, \
558
        'only homogeneous graph is supported'
Zihao Ye's avatar
Zihao Ye committed
559
    adj_k = g.adj(scipy_fmt=g.formats()['created'][0]) ** k
560
    return F.tensor(adj_k.todense().astype(np.float32))
561

562
563
564
565
566
567
568
def khop_graph(g, k, copy_ndata=True):
    """Return the graph whose edges connect the :attr:`k`-hop neighbors of the original graph.

    More specifically, an edge from node ``u`` and node ``v`` exists in the new graph if
    and only if a path with length :attr:`k` exists from node ``u`` to node ``v`` in the
    original graph.

569
570
571
572
573
    The adjacency matrix of the returned graph is :math:`A^k`
    (where :math:`A` is the adjacency matrix of :math:`g`).

    Parameters
    ----------
574
    g : DGLGraph
575
576
577
        The input graph.
    k : int
        The :math:`k` in `k`-hop graph.
578
579
580
581
582
583
584
    copy_ndata: bool, optional
        If True, the node features of the new graph are copied from the
        original graph.

        If False, the new graph will not have any node features.

        (Default: True)
585
586
587

    Returns
    -------
588
589
590
591
592
    DGLGraph
        The returned graph.

    Notes
    -----
593
594
595
    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
    tensors with the input graph. Hence, users should try to avoid in-place operations
    which will be visible to both graphs.
596
597
598
599

    Examples
    --------

Mufei Li's avatar
Mufei Li committed
600
601
602
    Below gives an easy example:

    >>> import dgl
603
    >>> g = dgl.graph(([0, 1], [1, 2]))
Mufei Li's avatar
Mufei Li committed
604
605
606
607
608
609
    >>> g_2 = dgl.transform.khop_graph(g, 2)
    >>> print(g_2.edges())
    (tensor([0]), tensor([2]))

    A more complicated example:

610
    >>> import dgl
611
    >>> g = dgl.graph(([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0]))
612
613
614
615
616
617
618
619
620
    >>> dgl.khop_graph(g, 1)
    DGLGraph(num_nodes=5, num_edges=10,
             ndata_schemes={}
             edata_schemes={})
    >>> dgl.khop_graph(g, 3)
    DGLGraph(num_nodes=5, num_edges=40,
             ndata_schemes={}
             edata_schemes={})
    """
621
    assert g.is_homogeneous, \
622
        'only homogeneous graph is supported'
623
    n = g.number_of_nodes()
Zihao Ye's avatar
Zihao Ye committed
624
    adj_k = g.adj(transpose=True, scipy_fmt=g.formats()['created'][0]) ** k
625
626
627
628
629
630
    adj_k = adj_k.tocoo()
    multiplicity = adj_k.data
    row = np.repeat(adj_k.row, multiplicity)
    col = np.repeat(adj_k.col, multiplicity)
    # TODO(zihao): we should support creating multi-graph from scipy sparse matrix
    # in the future.
631
632
633
634
635
636
637
638
    new_g = convert.graph((row, col), num_nodes=n)

    # handle ndata
    if copy_ndata:
        node_frames = utils.extract_node_subframes(g, None)
        utils.set_new_frames(new_g, node_frames=node_frames)

    return new_g
639

640
def reverse(g, copy_ndata=True, copy_edata=False, *, share_ndata=None, share_edata=None):
641
    r"""Return a new graph with every edges being the reverse ones in the input graph.
642
643

    The reverse (also called converse, transpose) of a graph with edges
644
645
    :math:`(i_1, j_1), (i_2, j_2), \cdots` of type ``(U, E, V)`` is a new graph with edges
    :math:`(j_1, i_1), (j_2, i_2), \cdots` of type ``(V, E, U)``.
646
647
648

    Parameters
    ----------
649
    g : DGLGraph
650
651
652
        The input graph.
    copy_ndata: bool, optional
        If True, the node features of the reversed graph are copied from the
653
        original graph. If False, the reversed graph will not have any node features.
654
655
656
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the reversed graph are copied from the
657
        original graph. If False, the reversed graph will not have any edge features.
658
659
660
661
        (Default: False)

    Return
    ------
662
    DGLGraph
663
664
665
666
        The reversed graph.

    Notes
    -----
667
668
669
670
    If :attr:`copy_ndata` or :attr:`copy_edata` is True,
    the resulting graph will share the node or edge feature
    tensors with the input graph. Hence, users should try to avoid in-place operations
    which will be visible to both graphs.
671
672
673

    Examples
    --------
674
    **Homogeneous graphs**
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692

    Create a graph to reverse.

    >>> import dgl
    >>> import torch as th
    >>> g = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 0])))
    >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]])
    >>> g.edata['h'] = th.tensor([[3.], [4.], [5.]])

    Reverse the graph.

    >>> rg = dgl.reverse(g, copy_edata=True)
    >>> rg.ndata['h']
    tensor([[0.],
            [1.],
            [2.]])

    The i-th edge in the reversed graph corresponds to the i-th edge in the
693
    original graph. When :attr:`copy_edata` is True, they have the same features.
694
695
696
697
698
699
700
701

    >>> rg.edges()
    (tensor([1, 2, 0]), tensor([0, 1, 2]))
    >>> rg.edata['h']
    tensor([[3.],
            [4.],
            [5.]])

702
    **Heterogenenous graphs**
703
704

    >>> g = dgl.heterograph({
705
706
707
    ...     ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
    ...     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
    ... })
708
709
710
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)

711
    The resulting graph will have edge types
712
    ``('user', 'follows', 'user)`` and ``('game', 'plays', 'user')``.
713
714
715
716
717
718
719
720
721
722

    >>> rg = dgl.reverse(g, copy_ndata=True)
    >>> rg
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'follows', 'user'): 2, ('game', 'plays', 'user'): 3},
          metagraph=[('user', 'user'), ('game', 'user')])
    >>> rg.edges(etype='follows')
    (tensor([1, 2]), tensor([0, 2]))
    >>> rg.edges(etype='plays')
    (tensor([2, 1, 1]), tensor([1, 2, 1]))
723
    >>> rg.nodes['game'].data['hv']
724
725
726
727
728
729
    tensor([[1.],
            [1.],
            [1.]])
    >>> rg.edges['plays'].data
    {}
    """
730
731
732
733
734
735
736
    if share_ndata is not None:
        dgl_warning('share_ndata argument has been renamed to copy_ndata.')
        copy_ndata = share_ndata
    if share_edata is not None:
        dgl_warning('share_edata argument has been renamed to copy_edata.')
        copy_edata = share_edata
    if g.is_block:
737
738
739
        # TODO(0.5 release, xiangsx) need to handle BLOCK
        # currently reversing a block results in undefined behavior
        raise DGLError('Reversing a block graph is not supported.')
740
741
    gidx = g._graph.reverse()
    new_g = DGLHeteroGraph(gidx, g.ntypes, g.etypes)
742
743
744
745
746

    # handle ndata
    if copy_ndata:
        # for each ntype
        for ntype in g.ntypes:
747
            new_g.nodes[ntype].data.update(g.nodes[ntype].data)
748
749
750
751

    # handle edata
    if copy_edata:
        # for each etype
752
753
754
        for utype, etype, vtype in g.canonical_etypes:
            new_g.edges[vtype, etype, utype].data.update(
                g.edges[utype, etype, vtype].data)
755
756
757

    return new_g

758
DGLHeteroGraph.reverse = utils.alias_func(reverse)
759

760
761
762
def to_simple_graph(g):
    """Convert the graph to a simple graph with no multi-edge.

763
    DEPRECATED: renamed to dgl.to_simple
764
765
766
767
768
769
770
771
772
773
774

    Parameters
    ----------
    g : DGLGraph
        The input graph.

    Returns
    -------
    DGLGraph
        A simple graph.
    """
775
776
    dgl_warning('dgl.to_simple_graph is renamed to dgl.to_simple in v0.5.')
    return to_simple(g)
777

778
def to_bidirected_stale(g, readonly=True):
779
780
781
782
    """NOTE: this function only works on the deprecated
    :class:`dgl.DGLGraphStale` object.

    Convert the graph to a bidirected graph.
783
784

    The function generates a new graph with no node/edge feature.
785
786
    If g has an edge for ``(u, v)`` but no edge for ``(v, u)``, then the
    returned graph will have both ``(u, v)`` and ``(v, u)``.
787

788
    If the input graph is a multigraph (there are multiple edges from node u to node v),
789
    the returned graph isn't well defined.
790
791
792

    Parameters
    ----------
793
    g : DGLGraphStale
794
        The input graph.
795
    readonly : bool
796
797
        Whether the returned bidirected graph is readonly or not.

798
799
        (Default: True)

800
801
    Notes
    -----
802
    Please make sure g is a simple graph, otherwise the return value is undefined.
803

804
805
806
807
808
809
810
811
812
    Returns
    -------
    DGLGraph

    Examples
    --------
    The following two examples use PyTorch backend, one for non-multi graph
    and one for multi-graph.

813
    >>> g = dgl._deprecate.graph.DGLGraph()
814
815
    >>> g.add_nodes(2)
    >>> g.add_edges([0, 0], [0, 1])
816
    >>> bg1 = dgl.to_bidirected_stale(g)
817
818
819
820
    >>> bg1.edges()
    (tensor([0, 1, 0]), tensor([0, 0, 1]))
    """
    if readonly:
821
        newgidx = _CAPI_DGLToBidirectedImmutableGraph(g._graph)
822
    else:
823
        newgidx = _CAPI_DGLToBidirectedMutableGraph(g._graph)
824
    return DGLGraphStale(newgidx)
825

826
def laplacian_lambda_max(g):
827
    """Return the largest eigenvalue of the normalized symmetric Laplacian of a graph.
828

829
830
    If the graph is batched from multiple graphs, return the list of the largest eigenvalue
    for each graph instead.
831
832
833

    Parameters
    ----------
834
    g : DGLGraph
835
836
        The input graph, it must be a bi-directed homogeneous graph, i.e., every edge
        should have an accompanied reverse edge in the graph.
837
        The graph can be batched from multiple graphs.
838
839
840

    Returns
    -------
841
842
843
844
845
846
    list[float]
        A list where the i-th item indicates the largest eigenvalue
        of i-th graph in :attr:`g`.

        In the case where the function takes a single graph, it will return a list
        consisting of a single element.
847
848
849
850

    Examples
    --------
    >>> import dgl
851
    >>> g = dgl.graph(([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], [1, 2, 3, 4, 0, 4, 0, 1, 2, 3]))
852
853
854
    >>> dgl.laplacian_lambda_max(g)
    [1.809016994374948]
    """
855
    g_arr = batch.unbatch(g)
856
857
858
    rst = []
    for g_i in g_arr:
        n = g_i.number_of_nodes()
Zihao Ye's avatar
Zihao Ye committed
859
        adj = g_i.adj(scipy_fmt=g_i.formats()['created'][0]).astype(float)
860
        norm = sparse.diags(F.asnumpy(g_i.in_degrees()).clip(1) ** -0.5, dtype=float)
861
862
863
864
865
        laplacian = sparse.eye(n) - norm * adj * norm
        rst.append(sparse.linalg.eigs(laplacian, 1, which='LM',
                                      return_eigenvectors=False)[0].real)
    return rst

Mufei Li's avatar
Mufei Li committed
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
def metapath_reachable_graph(g, metapath):
    """Return a graph where the successors of any node ``u`` are nodes reachable from ``u`` by
    the given metapath.

    If the beginning node type ``s`` and ending node type ``t`` are the same, it will return
    a homogeneous graph with node type ``s = t``.  Otherwise, a unidirectional bipartite graph
    with source node type ``s`` and destination node type ``t`` is returned.

    In both cases, two nodes ``u`` and ``v`` will be connected with an edge ``(u, v)`` if
    there exists one path matching the metapath from ``u`` to ``v``.

    The result graph keeps the node set of type ``s`` and ``t`` in the original graph even if
    they might have no neighbor.

    The features of the source/destination node type in the original graph would be copied to
    the new graph.

    Parameters
    ----------
885
    g : DGLGraph
Mufei Li's avatar
Mufei Li committed
886
887
888
889
890
891
        The input graph
    metapath : list[str or tuple of str]
        Metapath in the form of a list of edge types

    Returns
    -------
892
    DGLGraph
893
        A homogeneous or unidirectional bipartite graph. It will be on CPU regardless of
894
895
896
897
898
899
900
901
902
903
        whether the input graph is on CPU or GPU.

    Examples
    --------
    >>> g = dgl.heterograph({
    ...     ('A', 'AB', 'B'): ([0, 1, 2], [1, 2, 3]),
    ...     ('B', 'BA', 'A'): ([1, 2, 3], [0, 1, 2])})
    >>> new_g = dgl.metapath_reachable_graph(g, ['AB', 'BA'])
    >>> new_g.edges(order='eid')
    (tensor([0, 1, 2]), tensor([0, 1, 2]))
Mufei Li's avatar
Mufei Li committed
904
905
906
907
908
909
910
911
    """
    adj = 1
    for etype in metapath:
        adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=True)

    adj = (adj != 0).tocsr()
    srctype = g.to_canonical_etype(metapath[0])[0]
    dsttype = g.to_canonical_etype(metapath[-1])[2]
912
913
914
    new_g = convert.heterograph({(srctype, '_E', dsttype): adj.nonzero()},
                                {srctype: adj.shape[0], dsttype: adj.shape[1]},
                                idtype=g.idtype, device=g.device)
Mufei Li's avatar
Mufei Li committed
915

916
    # copy srcnode features
917
    new_g.nodes[srctype].data.update(g.nodes[srctype].data)
918
    # copy dstnode features
Mufei Li's avatar
Mufei Li committed
919
    if srctype != dsttype:
920
        new_g.nodes[dsttype].data.update(g.nodes[dsttype].data)
Mufei Li's avatar
Mufei Li committed
921
922

    return new_g
923

924
def add_nodes(g, num, data=None, ntype=None):
925
    r"""Add the given number of nodes to the graph and return a new graph.
926

927
    The new nodes will have IDs starting from ``g.num_nodes(ntype)``.
928
929
930
931

    Parameters
    ----------
    num : int
932
933
934
935
        The number of nodes to add.
    data : dict[str, Tensor], optional
        Feature data of the added nodes. The keys are feature names
        while the values are feature data.
936
    ntype : str, optional
937
938
        The node type name. Can be omitted if there is
        only one type of nodes in the graph.
939
940
941

    Return
    ------
942
    DGLGraph
943
944
945
946
        The graph with newly added nodes.

    Notes
    -----
947
948
949
950
    * For features in :attr:`g` but not in :attr:`data`,
      DGL assigns zero features for the newly added nodes.
    * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
      for the existing nodes in the graph.
951
952

    Examples
953
    --------
954

955
    The following example uses PyTorch backend.
956

957
958
959
    >>> import dgl
    >>> import torch

960
    **Homogeneous Graphs**
961
962
963
964
965
966
967
968
969

    >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
    >>> g.num_nodes()
    3
    >>> g = dgl.add_nodes(g, 2)
    >>> g.num_nodes()
    5

    If the graph has some node features and new nodes are added without
970
    features, their features will be filled with zeros.
971
972
973
974
975
976

    >>> g.ndata['h'] = torch.ones(5, 1)
    >>> g = dgl.add_nodes(g, 1)
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.]])

977
    Assign features for the new nodes.
978
979
980
981
982

    >>> g = dgl.add_nodes(g, 1, {'h': torch.ones(1, 1), 'w': torch.ones(1, 1)})
    >>> g.ndata['h']
    tensor([[1.], [1.], [1.], [1.], [1.], [0.], [1.]])

983
984
    Since :attr:`data` contains new feature fields, the features for existing nodes
    will be filled with zeros.
985
986
987
988

    >>> g.ndata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [0.], [1.]])

989
    **Heterogeneous Graphs**
990
991

    >>> g = dgl.heterograph({
992
993
994
995
996
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
    ...                                 torch.tensor([0, 0, 1, 1])),
    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
    ...                                         torch.tensor([0, 1]))
    ...     })
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
    >>> g.num_nodes('user')
    3
    >>> g = dgl.add_nodes(g, 2, ntype='user')
    >>> g.num_nodes('user')
    5

    See Also
    --------
    remove_nodes
    add_edges
    remove_edges
    """
    g = g.clone()
    g.add_nodes(num, data=data, ntype=ntype)
    return g

def add_edges(g, u, v, data=None, etype=None):
1014
    r"""Add the edges to the graph and return a new graph.
1015

1016
    The i-th new edge will be from ``u[i]`` to ``v[i]``.  The IDs of the new
1017
    edges will start from ``g.num_edges(etype)``.
1018
1019

    Parameters
1020
    ----------
1021
    u : int, Tensor or iterable[int]
1022
        Source node IDs, ``u[i]`` gives the source node for the i-th new edge.
1023
    v : int, Tensor or iterable[int]
1024
        Destination node IDs, ``v[i]`` gives the destination node for the i-th new edge.
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
    data : dict[str, Tensor], optional
        Feature data of the added edges. The keys are feature names
        while the values are feature data.
    etype : str or (str, str, str), optional
        The type names of the edges. The allowed type name formats are:

        * ``(str, str, str)`` for source node type, edge type and destination node type.
        * or one ``str`` edge type name if the name can uniquely identify a
          triplet format in the graph.

        Can be omitted if the graph has only one type of edges.
1036

1037
1038
    Return
    ------
1039
    DGLGraph
1040
1041
1042
1043
        The graph with newly added edges.

    Notes
    -----
1044
1045
1046
1047
1048
1049
1050
    * If the end nodes of the given edges do not exist in :attr:`g`,
      :func:`dgl.add_nodes` is invoked to add those nodes.
      The node features of the new nodes will be filled with zeros.
    * For features in :attr:`g` but not in :attr:`data`,
      DGL assigns zero features for the newly added nodes.
    * For feature in :attr:`data` but not in :attr:`g`, DGL assigns zero features
      for the existing nodes in the graph.
1051
1052

    Examples
1053
    --------
1054
    The following example uses PyTorch backend.
1055

1056
1057
    >>> import dgl
    >>> import torch
1058

1059
    **Homogeneous Graphs**
1060

1061
1062
1063
1064
1065
1066
    >>> g = dgl.graph((torch.tensor([0, 1]), torch.tensor([1, 2])))
    >>> g.num_edges()
    2
    >>> g = dgl.add_edges(g, torch.tensor([1, 3]), torch.tensor([0, 1]))
    >>> g.num_edges()
    4
1067

1068
1069
    Since ``u`` or ``v`` contains a non-existing node ID, the nodes are
    added implicitly.
1070

1071
1072
1073
1074
    >>> g.num_nodes()
    4

    If the graph has some edge features and new edges are added without
1075
    features, their features will be filled with zeros.
1076
1077
1078
1079
1080
1081

    >>> g.edata['h'] = torch.ones(4, 1)
    >>> g = dgl.add_edges(g, torch.tensor([1]), torch.tensor([1]))
    >>> g.edata['h']
    tensor([[1.], [1.], [1.], [1.], [0.]])

1082
    You can also assign features for the new edges in adding new edges.
1083
1084

    >>> g = dgl.add_edges(g, torch.tensor([0, 0]), torch.tensor([2, 2]),
1085
    ...                   {'h': torch.tensor([[1.], [2.]]), 'w': torch.ones(2, 1)})
1086
1087
    >>> g.edata['h']
    tensor([[1.], [1.], [1.], [1.], [0.], [1.], [2.]])
1088
1089

    Since :attr:`data` contains new feature fields, the features for old edges
1090
    will be filled with zeros.
1091

1092
1093
1094
    >>> g.edata['w']
    tensor([[0.], [0.], [0.], [0.], [0.], [1.], [1.]])

1095
    **Heterogeneous Graphs**
1096
1097

    >>> g = dgl.heterograph({
1098
1099
1100
1101
1102
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
    ...                                 torch.tensor([0, 0, 1, 1])),
    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
    ...                                         torch.tensor([0, 1]))
    ...     })
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
    >>> g.number_of_edges('plays')
    4
    >>> g = dgl.add_edges(g, torch.tensor([3]), torch.tensor([3]), etype='plays')
    >>> g.number_of_edges('plays')
    5

    See Also
    --------
    add_nodes
    remove_nodes
    remove_edges
1114
    """
1115
1116
1117
1118
    g = g.clone()
    g.add_edges(u, v, data=data, etype=etype)
    return g

1119
def remove_edges(g, eids, etype=None, store_ids=False):
1120
    r"""Remove the specified edges and return a new graph.
1121

1122
1123
1124
    Also delete the features of the edges. The edges must exist in the graph.
    The resulting graph has the same number of the nodes as the input one,
    even if some nodes become isolated after the the edge removal.
1125
1126
1127

    Parameters
    ----------
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
    eids : int, Tensor, iterable[int]
        The IDs of the edges to remove.
    etype : str or (str, str, str), optional
        The type names of the edges. The allowed type name formats are:

        * ``(str, str, str)`` for source node type, edge type and destination node type.
        * or one ``str`` edge type name if the name can uniquely identify a
          triplet format in the graph.

        Can be omitted if the graph has only one type of edges.
1138
1139
1140
1141
    store_ids : bool, optional
        If True, it will store the raw IDs of the extracted nodes and edges in the ``ndata``
        and ``edata`` of the resulting graph under name ``dgl.NID`` and ``dgl.EID``,
        respectively.
1142
1143
1144

    Return
    ------
1145
    DGLGraph
1146
1147
1148
1149
1150
1151
1152
        The graph with edges deleted.

    Examples
    --------
    >>> import dgl
    >>> import torch

1153
    **Homogeneous Graphs**
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166

    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
    >>> g = dgl.remove_edges(g, torch.tensor([0, 1]))
    >>> g
    Graph(num_nodes=3, num_edges=1,
        ndata_schemes={}
        edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)})
    >>> g.edges('all')
    (tensor([2]), tensor([2]), tensor([0]))
    >>> g.edata['he']
    tensor([[2.]])

1167
    **Heterogeneous Graphs**
1168
1169

    >>> g = dgl.heterograph({
1170
1171
1172
1173
1174
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
    ...                                 torch.tensor([0, 0, 1, 1])),
    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
    ...                                         torch.tensor([0, 1]))
    ...     })
1175
1176
1177
    >>> g = dgl.remove_edges(g, torch.tensor([0, 1]), 'plays')
    >>> g.edges('all', etype='plays')
    (tensor([0, 1]), tensor([0, 0]), tensor([0, 1]))
1178

1179
1180
1181
1182
1183
1184
1185
    See Also
    --------
    add_nodes
    add_edges
    remove_nodes
    """
    g = g.clone()
1186
    g.remove_edges(eids, etype=etype, store_ids=store_ids)
1187
1188
1189
    return g


1190
def remove_nodes(g, nids, ntype=None, store_ids=False):
1191
    r"""Remove the specified nodes and return a new graph.
1192

1193
1194
1195
    Also delete the features. Edges that connect from/to the nodes will be
    removed as well. After the removal, DGL re-labels the remaining nodes and edges
    with IDs from 0.
1196
1197
1198

    Parameters
    ----------
1199
1200
    nids : int, Tensor, iterable[int]
        The nodes to be removed.
1201
1202
1203
    ntype : str, optional
        The type of the nodes to remove. Can be omitted if there is
        only one node type in the graph.
1204
1205
1206
1207
    store_ids : bool, optional
        If True, it will store the raw IDs of the extracted nodes and edges in the ``ndata``
        and ``edata`` of the resulting graph under name ``dgl.NID`` and ``dgl.EID``,
        respectively.
1208
1209
1210

    Return
    ------
1211
    DGLGraph
1212
1213
1214
1215
1216
1217
1218
1219
        The graph with nodes deleted.

    Examples
    --------

    >>> import dgl
    >>> import torch

1220
    **Homogeneous Graphs**
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234

    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([0, 1, 2])))
    >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
    >>> g = dgl.remove_nodes(g, torch.tensor([0, 1]))
    >>> g
    Graph(num_nodes=1, num_edges=1,
        ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)}
        edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)})
    >>> g.ndata['hv']
    tensor([[2.]])
    >>> g.edata['he']
    tensor([[2.]])

1235
    **Heterogeneous Graphs**
1236
1237

    >>> g = dgl.heterograph({
1238
1239
1240
1241
1242
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1, 1, 2]),
    ...                                 torch.tensor([0, 0, 1, 1])),
    ...     ('developer', 'develops', 'game'): (torch.tensor([0, 1]),
    ...                                         torch.tensor([0, 1]))
    ...     })
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
    >>> g = dgl.remove_nodes(g, torch.tensor([0, 1]), ntype='game')
    >>> g.num_nodes('user')
    3
    >>> g.num_nodes('game')
    0
    >>> g.num_edges('plays')
    0

    See Also
    --------
    add_nodes
    add_edges
    remove_edges
    """
    g = g.clone()
1258
    g.remove_nodes(nids, ntype=ntype, store_ids=store_ids)
1259
1260
1261
    return g

def add_self_loop(g, etype=None):
1262
    r"""Add self-loops for each node in the graph and return a new graph.
1263
1264
1265
1266
1267

    Parameters
    ----------
    g : DGLGraph
        The graph.
1268
1269
1270
1271
1272
1273
    etype : str or (str, str, str), optional
        The type names of the edges. The allowed type name formats are:

        * ``(str, str, str)`` for source node type, edge type and destination node type.
        * or one ``str`` edge type name if the name can uniquely identify a
          triplet format in the graph.
1274

1275
        Can be omitted if the graph has only one type of edges.
1276
1277
1278

    Return
    ------
1279
    DGLGraph
1280
        The graph with self-loops.
1281
1282
1283

    Notes
    -----
1284
1285
1286
1287
    * The function only supports homogeneous graphs or heterogeneous graphs but
      the relation graph specified by the :attr:`etype` argument is homogeneous.
    * The function adds self-loops regardless of whether they already exist or not.
      If one wishes to have exactly one self-loop for every node,
1288
      call :func:`remove_self_loop` before invoking :func:`add_self_loop`.
1289
    * Features of the new edges (self-loop edges) will be filled with zeros.
1290
1291
1292
1293
1294
1295

    Examples
    --------
    >>> import dgl
    >>> import torch

1296
    **Homogeneous Graphs**
1297
1298
1299
1300
1301
1302
1303

    >>> g = dgl.graph((torch.tensor([0, 0, 2]), torch.tensor([2, 1, 0])))
    >>> g.ndata['hv'] = torch.arange(3).float().reshape(-1, 1)
    >>> g.edata['he'] = torch.arange(3).float().reshape(-1, 1)
    >>> g = dgl.add_self_loop(g)
    >>> g
    Graph(num_nodes=3, num_edges=6,
1304
1305
        ndata_schemes={'hv': Scheme(shape=(1,), dtype=torch.float32)}
        edata_schemes={'he': Scheme(shape=(1,), dtype=torch.float32)})
1306
1307
1308
1309
1310
1311
1312
1313
    >>> g.edata['he']
    tensor([[0.],
            [1.],
            [2.],
            [0.],
            [0.],
            [0.]])

1314
    **Heterogeneous Graphs**
1315
1316

    >>> g = dgl.heterograph({
1317
1318
1319
1320
    ...     ('user', 'follows', 'user'): (torch.tensor([1, 2]),
    ...                                   torch.tensor([0, 1])),
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
    ...                                 torch.tensor([0, 1]))})
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
    >>> g = dgl.add_self_loop(g, etype='follows')
    >>> g
    Graph(num_nodes={'user': 3, 'game': 2},
        num_edges={('user', 'plays', 'game'): 2, ('user', 'follows', 'user'): 5},
        metagraph=[('user', 'user'), ('user', 'game')])
    """
    etype = g.to_canonical_etype(etype)
    if etype[0] != etype[2]:
        raise DGLError(
            'add_self_loop does not support unidirectional bipartite graphs: {}.' \
            'Please make sure the types of head node and tail node are identical.' \
            ''.format(etype))
    nodes = g.nodes(etype[0])
    new_g = add_edges(g, nodes, nodes, etype=etype)
1335
1336
    return new_g

1337
DGLHeteroGraph.add_self_loop = utils.alias_func(add_self_loop)
1338
1339

def remove_self_loop(g, etype=None):
1340
    r""" Remove self-loops for each node in the graph and return a new graph.
1341
1342
1343

    Parameters
    ----------
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
    g : DGLGraph
        The graph.
    etype : str or (str, str, str), optional
        The type names of the edges. The allowed type name formats are:

        * ``(str, str, str)`` for source node type, edge type and destination node type.
        * or one ``str`` edge type name if the name can uniquely identify a
          triplet format in the graph.

        Can be omitted if the graph has only one type of edges.

    Notes
    -----
    If a node has multiple self-loops, remove them all. Do nothing for nodes without
    self-loops.
1359
1360
1361
1362

    Examples
    ---------

1363
1364
    >>> import dgl
    >>> import torch
1365

1366
    **Homogeneous Graphs**
1367

1368
    >>> g = dgl.graph((torch.tensor([0, 0, 0, 1]), torch.tensor([1, 0, 0, 2])))
1369
1370
1371
1372
1373
1374
1375
1376
    >>> g.edata['he'] = torch.arange(4).float().reshape(-1, 1)
    >>> g = dgl.remove_self_loop(g)
    >>> g
    Graph(num_nodes=3, num_edges=2,
        edata_schemes={'he': Scheme(shape=(2,), dtype=torch.float32)})
    >>> g.edata['he']
    tensor([[0.],[3.]])

1377
    **Heterogeneous Graphs**
1378
1379

    >>> g = dgl.heterograph({
1380
1381
1382
1383
1384
1385
    ...     ('user', 'follows', 'user'): (torch.tensor([0, 1, 1, 1, 2]),
    ...                                   torch.tensor([0, 0, 1, 1, 1])),
    ...     ('user', 'plays', 'game'): (torch.tensor([0, 1]),
    ...                                 torch.tensor([0, 1]))
    ...     })
    >>> g = dgl.remove_self_loop(g, etype='follows')
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
    >>> g.num_nodes('user')
    3
    >>> g.num_nodes('game')
    2
    >>> g.num_edges('follows')
    2
    >>> g.num_edges('plays')
    2

    See Also
1396
    --------
1397
    add_self_loop
1398
    """
1399
1400
1401
1402
1403
1404
1405
1406
1407
    etype = g.to_canonical_etype(etype)
    if etype[0] != etype[2]:
        raise DGLError(
            'remove_self_loop does not support unidirectional bipartite graphs: {}.' \
            'Please make sure the types of head node and tail node are identical.' \
            ''.format(etype))
    u, v = g.edges(form='uv', order='eid', etype=etype)
    self_loop_eids = F.tensor(F.nonzero_1d(u == v), dtype=F.dtype(u))
    new_g = remove_edges(g, self_loop_eids, etype=etype)
1408
1409
    return new_g

1410
DGLHeteroGraph.remove_self_loop = utils.alias_func(remove_self_loop)
1411

1412
def compact_graphs(graphs, always_preserve=None, copy_ndata=True, copy_edata=True):
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
    """Given a list of graphs with the same set of nodes, find and eliminate the common
    isolated nodes across all graphs.

    This function requires the graphs to have the same set of nodes (i.e. the node types
    must be the same, and the number of nodes of each node type must be the same).  The
    metagraph does not have to be the same.

    It finds all the nodes that have zero in-degree and zero out-degree in all the given
    graphs, and eliminates them from all the graphs.

1423
    Useful for graph sampling where you have a giant graph but you only wish to perform
1424
1425
1426
1427
    message passing on a smaller graph with a (tiny) subset of nodes.

    Parameters
    ----------
1428
1429
1430
1431
1432
1433
    graphs : DGLGraph or list[DGLGraph]
        The graph, or list of graphs.

        All graphs must be on CPU.

        All graphs must have the same set of nodes.
1434
1435
1436
    always_preserve : Tensor or dict[str, Tensor], optional
        If a dict of node types and node ID tensors is given, the nodes of given
        node types would not be removed, regardless of whether they are isolated.
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452

        If a Tensor is given, DGL assumes that all the graphs have one (same) node type.
    copy_ndata: bool, optional
        If True, the node features of the returned graphs are copied from the
        original graphs.

        If False, the returned graphs will not have any node features.

        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the reversed graph are copied from the
        original graph.

        If False, the reversed graph will not have any edge features.

        (Default: True)
1453
1454
1455

    Returns
    -------
1456
    DGLGraph or list[DGLGraph]
1457
1458
1459
1460
1461
1462
        The compacted graph or list of compacted graphs.

        Each returned graph would have a feature ``dgl.NID`` containing the mapping
        of node IDs for each type from the compacted graph(s) to the original graph(s).
        Note that the mapping is the same for all the compacted graphs.

1463
1464
1465
1466
        All the returned graphs are on CPU.

    Notes
    -----
1467
1468
1469
    This function currently requires that the same node type of all graphs should have
    the same node type ID, i.e. the node types are *ordered* the same.

1470
1471
1472
    If :attr:`copy_edata` is True, the resulting graph will share the edge feature
    tensors with the input graph. Hence, users should try to avoid in-place operations
    which will be visible to both graphs.
1473

1474
1475
1476
1477
1478
    Examples
    --------
    The following code constructs a bipartite graph with 20 users and 10 games, but
    only user #1 and #3, as well as game #3 and #5, have connections:

1479
1480
    >>> g = dgl.heterograph({('user', 'plays', 'game'): ([1, 3], [3, 5])},
    >>>                      {'user': 20, 'game': 10})
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498

    The following would compact the graph above to another bipartite graph with only
    two users and two games.

    >>> new_g, induced_nodes = dgl.compact_graphs(g)
    >>> induced_nodes
    {'user': tensor([1, 3]), 'game': tensor([3, 5])}

    The mapping tells us that only user #1 and #3 as well as game #3 and #5 are kept.
    Furthermore, the first user and second user in the compacted graph maps to
    user #1 and #3 in the original graph.  Games are similar.

    One can verify that the edge connections are kept the same in the compacted graph.

    >>> new_g.edges(form='all', order='eid', etype='plays')
    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))

    When compacting multiple graphs, nodes that do not have any connections in any
1499
    of the given graphs are removed.  So if you compact ``g`` and the following ``g2``
1500
1501
    graphs together:

1502
1503
    >>> g2 = dgl.heterograph({('user', 'plays', 'game'): ([1, 6], [6, 8])},
    >>>                      {'user': 20, 'game': 10})
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
    >>> (new_g, new_g2), induced_nodes = dgl.compact_graphs([g, g2])
    >>> induced_nodes
    {'user': tensor([1, 3, 6]), 'game': tensor([3, 5, 6, 8])}

    Then one can see that user #1 from both graphs, users #3 from the first graph, as
    well as user #6 from the second graph, are kept.  Games are similar.

    Similarly, one can also verify the connections:

    >>> new_g.edges(form='all', order='eid', etype='plays')
    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
    >>> new_g2.edges(form='all', order='eid', etype='plays')
    (tensor([0, 2]), tensor([2, 3]), tensor([0, 1]))
    """
    return_single = False
    if not isinstance(graphs, Iterable):
        graphs = [graphs]
        return_single = True
    if len(graphs) == 0:
        return []
1524
1525
    if graphs[0].is_block:
        raise DGLError('Compacting a block graph is not allowed.')
1526
    assert all(g.device == F.cpu() for g in graphs), 'all the graphs must be on CPU'
1527
1528
1529
1530

    # Ensure the node types are ordered the same.
    # TODO(BarclayII): we ideally need to remove this constraint.
    ntypes = graphs[0].ntypes
1531
1532
    idtype = graphs[0].idtype
    device = graphs[0].device
1533
1534
1535
1536
    for g in graphs:
        assert ntypes == g.ntypes, \
            ("All graphs should have the same node types in the same order, got %s and %s" %
             ntypes, g.ntypes)
1537
1538
1539
1540
        assert idtype == g.idtype, "Expect graph data type to be {}, but got {}".format(
            idtype, g.idtype)
        assert device == g.device, "Expect graph device to be {}, but got {}".format(
            device, g.device)
1541
1542
1543
1544
1545
1546
1547
1548
1549

    # Process the dictionary or tensor of "always preserve" nodes
    if always_preserve is None:
        always_preserve = {}
    elif not isinstance(always_preserve, Mapping):
        if len(ntypes) > 1:
            raise ValueError("Node type must be given if multiple node types exist.")
        always_preserve = {ntypes[0]: always_preserve}

1550
    always_preserve = utils.prepare_tensor_dict(graphs[0], always_preserve, 'always_preserve')
1551
1552
1553
1554
    always_preserve_nd = []
    for ntype in ntypes:
        nodes = always_preserve.get(ntype, None)
        if nodes is None:
1555
1556
            nodes = F.copy_to(F.tensor([], idtype), device)
        always_preserve_nd.append(F.to_dgl_nd(nodes))
1557
1558
1559
1560

    # Compact and construct heterographs
    new_graph_indexes, induced_nodes = _CAPI_DGLCompactGraphs(
        [g._graph for g in graphs], always_preserve_nd)
1561
    induced_nodes = [F.from_dgl_nd(nodes) for nodes in induced_nodes]
1562
1563
1564
1565

    new_graphs = [
        DGLHeteroGraph(new_graph_index, graph.ntypes, graph.etypes)
        for new_graph_index, graph in zip(new_graph_indexes, graphs)]
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575

    if copy_ndata:
        for g, new_g in zip(graphs, new_graphs):
            node_frames = utils.extract_node_subframes(g, induced_nodes)
            utils.set_new_frames(new_g, node_frames=node_frames)
    if copy_edata:
        for g, new_g in zip(graphs, new_graphs):
            edge_frames = utils.extract_edge_subframes(g, None)
            utils.set_new_frames(new_g, edge_frames=edge_frames)

1576
1577
1578
1579
1580
    if return_single:
        new_graphs = new_graphs[0]

    return new_graphs

1581
1582
def to_block(g, dst_nodes=None, include_dst_in_src=True):
    """Convert a graph into a bipartite-structured *block* for message passing.
1583

1584
1585
1586
    A block is a graph consisting of two sets of nodes: the
    *input* nodes and *output* nodes.  The input and output nodes can have multiple
    node types.  All the edges connect from input nodes to output nodes.
1587

1588
1589
1590
1591
1592
    Specifically, the input nodes and output nodes will have the same node types as the
    ones in the original graph.  DGL maps each edge ``(u, v)`` with edge type
    ``(utype, etype, vtype)`` in the original graph to the edge with type
    ``etype`` connecting from node ID ``u`` of type ``utype`` in the input side to node
    ID ``v`` of type ``vtype`` in the output side.
1593

1594
1595
1596
1597
    The output nodes of the block will only contain the nodes that have at least one
    inbound edge of any type.  The input nodes of the block will only contain the nodes
    that appear in the output nodes, as well as the nodes that have at least one outbound
    edge connecting to one of the output nodes.
1598

1599
    If the :attr:`dst_nodes` argument is not None, it specifies the output nodes instead.
1600
1601
1602

    Parameters
    ----------
1603
1604
    graph : DGLGraph
        The graph.  Must be on CPU.
1605
    dst_nodes : Tensor or dict[str, Tensor], optional
1606
1607
1608
1609
1610
1611
        The list of output nodes.

        If a tensor is given, the graph must have only one node type.

        If given, it must be a superset of all the nodes that have at least one inbound
        edge.  An error will be raised otherwise.
1612
    include_dst_in_src : bool
1613
1614
        If False, do not include output nodes in input nodes.

1615
        (Default: True)
1616
1617
1618

    Returns
    -------
1619
    DGLBlock
1620
1621
1622
1623
1624
1625
1626
        The new graph describing the block.

        The node IDs induced for each type in both sides would be stored in feature
        ``dgl.NID``.

        The edge IDs induced for each type would be stored in feature ``dgl.EID``.

1627
1628
1629
1630
1631
    Raises
    ------
    DGLError
        If :attr:`dst_nodes` is specified but it is not a superset of all the nodes that
        have at least one inbound edge.
1632

1633
1634
1635
1636
1637
1638
1639
    Notes
    -----
    :func:`to_block` is most commonly used in customizing neighborhood sampling
    for stochastic training on a large graph.  Please refer to the user guide
    :ref:`guide-minibatch` for a more thorough discussion about the methodology
    of stochastic training.

1640
1641
1642
    Examples
    --------
    Converting a homogeneous graph to a block as described above:
1643
    >>> g = dgl.graph(([1, 2], [2, 3]))
1644
1645
    >>> block = dgl.to_block(g, torch.LongTensor([3, 2]))

1646
    The output nodes would be exactly the same as the ones given: [3, 2].
1647

1648
1649
1650
1651
    >>> induced_dst = block.dstdata[dgl.NID]
    >>> induced_dst
    tensor([3, 2])

1652
    The first few input nodes would also be exactly the same as
1653
1654
    the ones given.  The rest of the nodes are the ones necessary for message passing
    into nodes 3, 2.  This means that the node 1 would be included.
1655

1656
1657
1658
1659
    >>> induced_src = block.srcdata[dgl.NID]
    >>> induced_src
    tensor([3, 2, 1])

1660
1661
    You can notice that the first two nodes are identical to the given nodes as well as
    the output nodes.
1662
1663

    The induced edges can also be obtained by the following:
1664

1665
1666
1667
    >>> block.edata[dgl.EID]
    tensor([2, 1])

1668
    This indicates that edge (2, 3) and (1, 2) are included in the result graph.  You can
1669
1670
    verify that the first edge in the block indeed maps to the edge (2, 3), and the
    second edge in the block indeed maps to the edge (1, 2):
1671

1672
1673
1674
1675
    >>> src, dst = block.edges(order='eid')
    >>> induced_src[src], induced_dst[dst]
    (tensor([2, 1]), tensor([3, 2]))

1676
1677
1678
1679
1680
1681
1682
    The output nodes specified must be a superset of the nodes that have edges connecting
    to them.  For example, the following will raise an error since the output nodes
    does not contain node 3, which has an edge connecting to it.

    >>> g = dgl.graph(([1, 2], [2, 3]))
    >>> dgl.to_block(g, torch.LongTensor([2]))     # error

1683
    Converting a heterogeneous graph to a block is similar, except that when specifying
1684
    the output nodes, you have to give a dict:
1685

1686
    >>> g = dgl.heterograph({('A', '_E', 'B'): ([1, 2], [2, 3])})
1687

1688
1689
    If you don't specify any node of type A on the output side, the node type ``A``
    in the block would have zero nodes on the output side.
1690

1691
    >>> block = dgl.to_block(g, {'B': torch.LongTensor([3, 2])})
1692
    >>> block.number_of_dst_nodes('A')
1693
    0
1694
    >>> block.number_of_dst_nodes('B')
1695
    2
1696
    >>> block.dstnodes['B'].data[dgl.NID]
1697
1698
    tensor([3, 2])

1699
    The input side would contain all the nodes on the output side:
1700

1701
    >>> block.srcnodes['B'].data[dgl.NID]
1702
1703
    tensor([3, 2])

1704
    As well as all the nodes that have connections to the nodes on the output side:
1705

1706
    >>> block.srcnodes['A'].data[dgl.NID]
1707
1708
    tensor([2, 1])
    """
1709
1710
    assert g.device == F.cpu(), 'the graph must be on CPU'

1711
    if dst_nodes is None:
1712
        # Find all nodes that appeared as destinations
1713
        dst_nodes = defaultdict(list)
1714
1715
        for etype in g.canonical_etypes:
            _, dst = g.edges(etype=etype)
1716
1717
1718
1719
            dst_nodes[etype[2]].append(dst)
        dst_nodes = {ntype: F.unique(F.cat(values, 0)) for ntype, values in dst_nodes.items()}
    elif not isinstance(dst_nodes, Mapping):
        # dst_nodes is a Tensor, check if the g has only one type.
1720
1721
        if len(g.ntypes) > 1:
            raise ValueError(
1722
1723
                'Graph has more than one node type; please specify a dict for dst_nodes.')
        dst_nodes = {g.ntypes[0]: dst_nodes}
1724

Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
1725
1726
1727
1728
    dst_node_ids = [
        utils.toindex(dst_nodes.get(ntype, []), g._idtype_str).tousertensor()
        for ntype in g.ntypes]
    dst_node_ids_nd = [F.to_dgl_nd(nodes) for nodes in dst_node_ids]
1729

1730
    new_graph_index, src_nodes_nd, induced_edges_nd = _CAPI_DGLToBlock(
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
1731
        g._graph, dst_node_ids_nd, include_dst_in_src)
1732

1733
    # The new graph duplicates the original node types to SRC and DST sets.
1734
    new_ntypes = (g.ntypes, g.ntypes)
1735
    new_graph = DGLBlock(new_graph_index, new_ntypes, g.etypes)
1736
    assert new_graph.is_unibipartite  # sanity check
1737

1738
1739
    src_node_ids = [F.from_dgl_nd(src) for src in src_nodes_nd]
    edge_ids = [F.from_dgl_nd(eid) for eid in induced_edges_nd]
1740

1741
1742
1743
    node_frames = utils.extract_node_subframes_for_block(g, src_node_ids, dst_node_ids)
    edge_frames = utils.extract_edge_subframes(g, edge_ids)
    utils.set_new_frames(new_graph, node_frames=node_frames, edge_frames=edge_frames)
1744
1745
1746

    return new_graph

1747
1748
1749
1750
1751
1752
def to_simple(g,
              return_counts='count',
              writeback_mapping=False,
              copy_ndata=True,
              copy_edata=False):
    r"""Convert a graph to a simple graph without parallel edges and return.
1753

1754
1755
1756
1757
1758
1759
    For a heterogeneous graph with multiple edge types, DGL treats edges with the same
    edge type and endpoints as parallel edges and removes them.
    Optionally, one can get the the number of parallel edges by specifying the
    :attr:`return_counts` argument. To get the a mapping from the edge IDs in the
    input graph to the edge IDs in the resulting graph, set :attr:`writeback_mapping`
    to true.
1760

1761
1762
    Parameters
    ----------
1763
    g : DGLGraph
1764
        The input graph.  Must be on CPU.
1765
    return_counts : str, optional
1766
1767
        If given, the count of each edge in the original graph
        will be stored as edge features under the name
1768
1769
        ``return_counts``.  The old features with the same name will be replaced.

1770
1771
        (Default: "count")
    writeback_mapping: bool, optional
1772
1773
1774
1775
        If True, return an extra write-back mapping for each edge
        type.  The write-back mapping is a tensor recording
        the mapping from the edge IDs in the input graph to
        the edge IDs in the result graph. If the graph is
1776
1777
1778
1779
1780
        heterogeneous, DGL returns a dictionary of edge types and such
        tensors.

        If False, only the simple graph is returned.

1781
1782
1783
        (Default: False)
    copy_ndata: bool, optional
        If True, the node features of the simple graph are copied
1784
1785
1786
1787
        from the original graph.

        If False, the simple graph will not have any node features.

1788
1789
1790
1791
1792
1793
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the simple graph are copied
        from the original graph. If there exists duplicate edges between
        two nodes (u, v), the feature of the edge is randomly selected
        from one of the duplicate edges.
1794

1795
        If False, the simple graph will not have any edge features.
1796

1797
        (Default: False)
1798
1799
1800

    Returns
    -------
1801
    DGLGraph
1802
        The graph.
1803
    tensor or dict of tensor
1804
        The writeback mapping. Only when ``writeback_mapping`` is True.
1805
1806
1807

    Notes
    -----
1808
1809
1810
    If :attr:`copy_ndata` is True, the resulting graph will share the node feature
    tensors with the input graph. Hence, users should try to avoid in-place operations
    which will be visible to both graphs.
1811
1812
1813

    Examples
    --------
1814
    **Homogeneous Graphs**
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847

    Create a graph for demonstrating to_simple API.
    In the original graph, there are multiple edges between 1 and 2.

    >>> import dgl
    >>> import torch as th
    >>> g = dgl.graph((th.tensor([0, 1, 2, 1]), th.tensor([1, 2, 0, 2])))
    >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]])
    >>> g.edata['h'] = th.tensor([[3.], [4.], [5.], [6.]])

    Convert the graph to a simple graph. The return counts is
    stored in the edge feature 'cnt' and the writeback mapping
    is returned in a tensor.

    >>> sg, wm = dgl.to_simple(g, return_counts='cnt', writeback_mapping=True)
    >>> sg.ndata['h']
    tensor([[0.],
            [1.],
            [2.]])
    >>> u, v, eid = sg.edges(form='all')
    >>> u
    tensor([0, 1, 2])
    >>> v
    tensor([1, 2, 0])
    >>> eid
    tensor([0, 1, 2])
    >>> sg.edata['cnt']
    tensor([1, 2, 1])
    >>> wm
    tensor([0, 1, 2, 1])
    >>> 'h' in g.edata
    False

1848
    **Heterogeneous Graphs**
1849

1850
    >>> g = dgl.heterograph({
1851
1852
1853
    ...     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
    ...     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
    ... })
1854
1855
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)
1856

1857
    The return counts is stored in the default edge feature 'count' for each edge type.
1858

1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
    >>> sg, wm = dgl.to_simple(g, copy_ndata=False, writeback_mapping=True)
    >>> sg
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'wins', 'user'): 4, ('game', 'plays', 'user'): 3},
          metagraph=[('user', 'user'), ('game', 'user')])
    >>> sg.edges(etype='wins')
    (tensor([0, 2, 0, 2]), tensor([1, 1, 2, 0]))
    >>> wm[('user', 'wins', 'user')]
    tensor([0, 1, 2, 1, 3])
    >>> sg.edges(etype='plays')
    (tensor([2, 1, 1]), tensor([1, 2, 1]))
    >>> wm[('user', 'plays', 'game')]
    tensor([0, 1, 2])
    >>> 'hv' in sg.nodes['game'].data
    False
    >>> 'he' in sg.edges['plays'].data
    False
    >>> sg.edata['count']
    {('user', 'wins', 'user'): tensor([1, 2, 1, 1])
     ('user', 'plays', 'game'): tensor([1, 1, 1])}
1879
    """
1880
    assert g.device == F.cpu(), 'the graph must be on CPU'
1881
1882
    if g.is_block:
        raise DGLError('Cannot convert a block graph to a simple graph.')
1883
1884
    simple_graph_index, counts, edge_maps = _CAPI_DGLToSimpleHetero(g._graph)
    simple_graph = DGLHeteroGraph(simple_graph_index, g.ntypes, g.etypes)
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
    counts = [F.from_dgl_nd(count) for count in counts]
    edge_maps = [F.from_dgl_nd(edge_map) for edge_map in edge_maps]

    if copy_ndata:
        node_frames = utils.extract_node_subframes(g, None)
        utils.set_new_frames(simple_graph, node_frames=node_frames)
    if copy_edata:
        eids = []
        for i in range(len(g.canonical_etypes)):
            feat_idx = F.asnumpy(edge_maps[i])
            _, indices = np.unique(feat_idx, return_index=True)
            eids.append(F.zerocopy_from_numpy(indices))

        edge_frames = utils.extract_edge_subframes(g, eids)
        utils.set_new_frames(simple_graph, edge_frames=edge_frames)
1900
1901
1902
1903
1904

    if return_counts is not None:
        for count, canonical_etype in zip(counts, g.canonical_etypes):
            simple_graph.edges[canonical_etype].data[return_counts] = count

1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
    if writeback_mapping:
        # single edge type
        if len(edge_maps) == 1:
            return simple_graph, edge_maps[0]
        # multiple edge type
        else:
            wb_map = {}
            for edge_map, canonical_etype in zip(edge_maps, g.canonical_etypes):
                wb_map[canonical_etype] = edge_map
            return simple_graph, wb_map
1915
1916
1917

    return simple_graph

1918
DGLHeteroGraph.to_simple = utils.alias_func(to_simple)
1919

1920
def as_heterograph(g, ntype='_U', etype='_E'):  # pylint: disable=unused-argument
1921
1922
    """Convert a DGLGraph to a DGLHeteroGraph with one node and edge type.

1923
1924
    DEPRECATED: DGLGraph and DGLHeteroGraph have been merged. This function will
                do nothing and can be removed safely in all cases.
1925
    """
1926
1927
1928
    dgl_warning('DEPRECATED: DGLGraph and DGLHeteroGraph have been merged in v0.5.\n'
                '\tdgl.as_heterograph will do nothing and can be removed safely in all cases.')
    return g
1929
1930
1931
1932

def as_immutable_graph(hg):
    """Convert a DGLHeteroGraph with one node and edge type into a DGLGraph.

1933
1934
    DEPRECATED: DGLGraph and DGLHeteroGraph have been merged. This function will
                do nothing and can be removed safely in all cases.
1935
    """
1936
1937
1938
    dgl_warning('DEPRECATED: DGLGraph and DGLHeteroGraph have been merged in v0.5.\n'
                '\tdgl.as_immutable_graph will do nothing and can be removed safely in all cases.')
    return hg
1939

1940
_init_api("dgl.transform")