transform.py 63.5 KB
Newer Older
1
2
"""Module for graph transformation utilities."""

3
from collections.abc import Iterable, Mapping
4
from collections import defaultdict
5
6
import numpy as np
from scipy import sparse
7
from ._ffi.function import _init_api
8
from .graph import DGLGraph
9
10
from .heterograph import DGLHeteroGraph
from . import ndarray as nd
11
from . import backend as F
12
from .graph_index import from_coo
13
from .graph_index import _get_halo_subgraph_inner_node
14
from .graph import unbatch
15
from .convert import graph, bipartite, heterograph
16
from . import utils
17
18
from .base import EID, NID
from . import ndarray as nd
19

20

21
22
__all__ = [
    'line_graph',
23
    'line_heterograph',
24
25
26
    'khop_adj',
    'khop_graph',
    'reverse',
27
    'reverse_heterograph',
28
29
    'to_simple_graph',
    'to_bidirected',
30
    'to_bidirected_stale',
31
32
33
34
35
36
37
    'laplacian_lambda_max',
    'knn_graph',
    'segmented_knn_graph',
    'add_self_loop',
    'remove_self_loop',
    'metapath_reachable_graph',
    'compact_graphs',
38
    'to_block',
39
40
    'to_simple',
    'in_subgraph',
41
    'out_subgraph',
42
43
44
    'remove_edges',
    'as_immutable_graph',
    'as_heterograph']
45
46


47
48
49
50
51
52
53
54
55
56
def pairwise_squared_distance(x):
    """
    x : (n_samples, n_points, dims)
    return : (n_samples, n_points, n_points)
    """
    x2s = F.sum(x * x, -1, True)
    # assuming that __matmul__ is always implemented (true for PyTorch, MXNet and Chainer)
    return x2s + F.swapaxes(x2s, -1, -2) - 2 * x @ F.swapaxes(x, -1, -2)

#pylint: disable=invalid-name
57
def knn_graph(x, k):
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
    """Transforms the given point set to a directed graph, whose coordinates
    are given as a matrix. The predecessors of each point are its k-nearest
    neighbors.

    If a 3D tensor is given instead, then each row would be transformed into
    a separate graph.  The graphs will be unioned.

    Parameters
    ----------
    x : Tensor
        The input tensor.

        If 2D, each row of ``x`` corresponds to a node.

        If 3D, a k-NN graph would be constructed for each row.  Then
        the graphs are unioned.
    k : int
        The number of neighbors

    Returns
    -------
    DGLGraph
        The graph.  The node IDs are in the same order as ``x``.
    """
    if F.ndim(x) == 2:
        x = F.unsqueeze(x, 0)
    n_samples, n_points, _ = F.shape(x)

    dist = pairwise_squared_distance(x)
    k_indices = F.argtopk(dist, k, 2, descending=False)
    dst = F.copy_to(k_indices, F.cpu())

    src = F.zeros_like(dst) + F.reshape(F.arange(0, n_points), (1, -1, 1))

    per_sample_offset = F.reshape(F.arange(0, n_samples) * n_points, (-1, 1, 1))
    dst += per_sample_offset
    src += per_sample_offset
    dst = F.reshape(dst, (-1,))
    src = F.reshape(src, (-1,))
97
98
    adj = sparse.csr_matrix(
        (F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))),
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
99
        shape=(n_samples * n_points, n_samples * n_points))
100
101
102
103
104

    g = DGLGraph(adj, readonly=True)
    return g

#pylint: disable=invalid-name
105
def segmented_knn_graph(x, k, segs):
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
    """Transforms the given point set to a directed graph, whose coordinates
    are given as a matrix.  The predecessors of each point are its k-nearest
    neighbors.

    The matrices are concatenated along the first axis, and are segmented by
    ``segs``.  Each block would be transformed into a separate graph.  The
    graphs will be unioned.

    Parameters
    ----------
    x : Tensor
        The input tensor.
    k : int
        The number of neighbors
    segs : iterable of int
        Number of points of each point set.
        Must sum up to the number of rows in ``x``.

    Returns
    -------
    DGLGraph
        The graph.  The node IDs are in the same order as ``x``.
    """
    n_total_points, _ = F.shape(x)
    offset = np.insert(np.cumsum(segs), 0, 0)

    h_list = F.split(x, segs, 0)
    dst = [
        F.argtopk(pairwise_squared_distance(h_g), k, 1, descending=False) +
Quan (Andy) Gan's avatar
Quan (Andy) Gan committed
135
        int(offset[i])
136
137
138
139
140
141
142
143
144
145
146
        for i, h_g in enumerate(h_list)]
    dst = F.cat(dst, 0)
    src = F.arange(0, n_total_points).unsqueeze(1).expand(n_total_points, k)

    dst = F.reshape(dst, (-1,))
    src = F.reshape(src, (-1,))
    adj = sparse.csr_matrix((F.asnumpy(F.zeros_like(dst) + 1), (F.asnumpy(dst), F.asnumpy(src))))

    g = DGLGraph(adj, readonly=True)
    return g

147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
def to_bidirected(g, readonly=None, copy_ndata=True,
                  copy_edata=False, ignore_bipartite=False):
    r"""Convert the graph to a bidirected one.

    For a graph with edges :math:`(i_1, j_1), \cdots, (i_n, j_n)`, this
    function creates a new graph with edges
    :math:`(i_1, j_1), \cdots, (i_n, j_n), (j_1, i_1), \cdots, (j_n, i_n)`.

    For a heterograph with multiple edge types, we can treat edges corresponding
    to each type as a separate graph and convert the graph to a bidirected one
    for each of them.

    Since **to_bidirected is not well defined for unidirectional bipartite graphs**,
    an error will be raised if an edge type of the input heterograph is for a
    unidirectional bipartite graph. We can simply skip the edge types corresponding
    to unidirectional bipartite graphs by specifying ``ignore_bipartite=True``.

    Parameters
    ----------
    g : DGLGraph
        The input graph.
    readonly : bool, default to be True
        Deprecated. There will be no difference between readonly and non-readonly
    copy_ndata: bool, optional
        If True, the node features of the bidirected graph are copied from
        the original graph. If False, the bidirected
        graph will not have any node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the features of the reversed edges will be identical to
        the original ones."
        If False, the bidirected graph will not have any edge
        features.
        (Default: False)
    ignore_bipartite: bool, optional
        If True, unidirectional bipartite graphs are ignored and
        no error is raised. If False, an error  will be raised if
        an edge type of the input heterograph is for a unidirectional
        bipartite graph.

    Returns
    -------
    DGLGraph
        The bidirected graph

    Notes
    -----
    If ``copy_ndata`` is ``True``, same tensors are used as
    the node features of the original graph and the new graph.
    As a result, users should avoid performing in-place operations
    on the node features of the new graph to avoid feature corruption.
    On the contrary, edge features are concatenated,
    and they are not shared due to concatenation.
    For concrete examples, refer to the ``Examples`` section below.


    Examples
    --------
    **Homographs**

    >>> g = dgl.graph(th.tensor([0, 0]), th.tensor([0, 1]))
    >>> bg1 = dgl.to_bidirected(g)
    >>> bg1.edges()
    (tensor([0, 0, 0, 1]), tensor([0, 1, 0, 0]))

    To remove duplicate edges, see :func:to_simple

    **Heterographs with Multiple Edge Types**

    g = dgl.heterograph({
    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
    >>>     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1])),
    >>>     ('user', 'follows', 'user'): (th.tensor([1, 2, 1), th.tensor([0, 0, 0]))
    >>> })
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['wins'].data['h'] = th.tensor([0, 1, 2, 3, 4])

    The to_bidirected operation is applied to the subgraph
    corresponding to ('user', 'wins', 'user') and the
    subgraph corresponding to ('user', 'follows', 'user).
    The unidirectional bipartite subgraph ('user', 'plays', 'game')
    is ignored. Both the node features and edge features
    are shared.

    >>> bg = dgl.to_bidirected(g, copy_ndata=True,
                               copy_edata=True, ignore_bipartite=True)
    >>> bg.edges(('user', 'wins', 'user'))
    (tensor([0, 2, 0, 2, 2, 1, 1, 2, 1, 0]), tensor([1, 1, 2, 1, 0, 0, 2, 0, 2, 2]))
    >>> bg.edges(('user', 'follows', 'user'))
    (tensor([1, 2, 1, 0, 0, 0]), tensor([0, 0, 0, 1, 2, 1]))
    >>> bg.edges(('user', 'plays', 'game'))
    (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
    >>> bg.nodes['game'].data['hv']
    tensor([0, 0, 0])
    >>> bg.edges[('user', 'wins', 'user')].data['h']
    th.tensor([0, 1, 2, 3, 4, 0, 1, 2, 3, 4])
    """
    if readonly is not None:
        dgl_warning("Parameter readonly is deprecated" \
            "There will be no difference between readonly and non-readonly DGLGraph")

    canonical_etypes = g.canonical_etypes
    # fast path
    if ignore_bipartite is False:
        subgs = {}
        for c_etype in canonical_etypes:
            if c_etype[0] != c_etype[2]:
                assert False, "to_bidirected is not well defined for " \
                    "unidirectional bipartite graphs" \
                    ", but {} is unidirectional bipartite".format(c_etype)

            u, v = g.edges(form='uv', order='eid', etype=c_etype)
            subgs[c_etype] = (F.cat([u, v], dim=0), F.cat([v, u], dim=0))

        new_g = heterograph(subgs)
    else:
        subgs = {}
        for c_etype in canonical_etypes:
            if c_etype[0] != c_etype[2]:
                u, v = g.edges(form='uv', order='eid', etype=c_etype)
                subgs[c_etype] = (u, v)
            else:
                u, v = g.edges(form='uv', order='eid', etype=c_etype)
                subgs[c_etype] = (F.cat([u, v], dim=0), F.cat([v, u], dim=0))

        new_g = heterograph(subgs)

    # handle features
    if copy_ndata:
        # for each ntype
        for ntype in g.ntypes:
            # for each data field
            for k in g.nodes[ntype].data:
                new_g.nodes[ntype].data[k] = g.nodes[ntype].data[k]

    if copy_edata:
        # for each etype
        for c_etype in canonical_etypes:
            if c_etype[0] != c_etype[2]:
                # for each data field
                for k in g.edges[c_etype].data:
                    new_g.edges[c_etype].data[k] = g.edges[c_etype].data[k]
            else:
                for k in g.edges[c_etype].data:
                    new_g.edges[c_etype].data[k] = \
                        F.cat([g.edges[c_etype].data[k], g.edges[c_etype].data[k]], dim=0)
    return new_g

295
296
297
298
299
300
def line_graph(g, backtracking=True, shared=False):
    """Return the line graph of this graph.

    Parameters
    ----------
    g : dgl.DGLGraph
301
        The input graph.
302
303
304
305
306
307
308
309
310
311
312
313
314
315
    backtracking : bool, optional
        Whether the returned line graph is backtracking.
    shared : bool, optional
        Whether the returned line graph shares representations with `self`.

    Returns
    -------
    DGLGraph
        The line graph of this graph.
    """
    graph_data = g._graph.line_graph(backtracking)
    node_frame = g._edge_frame if shared else None
    return DGLGraph(graph_data, node_frame)

316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
def line_heterograph(g, backtracking=True):
    """Return the line graph of this graph.

    The graph should be an directed homogeneous graph. Aother type of graphs
    are not supported right now.

    All node features and edge features are not copied to the output

    Parameters
    ----------
    backtracking : bool
        Whether the pair of (v, u) (u, v) edges are treated as linked. Default True.

    Returns
    -------
    G : DGLHeteroGraph
        The line graph of this graph.

    Examples:
    A = [[0, 0, 1],
            [1, 0, 1],
            [1, 1, 0]]
    >>> g = dgl.graph(([0, 1, 1, 2, 2],[2, 0, 2, 0, 1]), 'user', 'follows')
    >>> lg = g.line_graph()
    >>> lg
    ... Graph(num_nodes=5, num_edges=8,
    ... ndata_schemes={}
    ... edata_schemes={})
    >>> lg.edges()
    ... (tensor([0, 0, 1, 2, 2, 3, 4, 4]), tensor([3, 4, 0, 3, 4, 0, 1, 2]))
    >>>
    >>> lg = g.line_graph(backtracking=False)
    >>> lg
    ... Graph(num_nodes=5, num_edges=4,
    ... ndata_schemes={}
    ... edata_schemes={})
    >>> lg.edges()
    ... (tensor([0, 1, 2, 4]), tensor([4, 0, 3, 1]))

    """
    assert g.is_homograph(), \
        'line_heterograph only support directed homogeneous graph right now'

    hgidx = _CAPI_DGLHeteroLineGraph(g._graph, backtracking)
    hg = DGLHeteroGraph(hgidx, g._etypes, g._ntypes)
    return hg

363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
def khop_adj(g, k):
    """Return the matrix of :math:`A^k` where :math:`A` is the adjacency matrix of :math:`g`,
    where a row represents the destination and a column represents the source.

    Parameters
    ----------
    g : dgl.DGLGraph
        The input graph.
    k : int
        The :math:`k` in :math:`A^k`.

    Returns
    -------
    tensor
        The returned tensor, dtype is ``np.float32``.

    Examples
    --------

    >>> import dgl
    >>> g = dgl.DGLGraph()
    >>> g.add_nodes(5)
    >>> g.add_edges([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0])
    >>> dgl.khop_adj(g, 1)
    tensor([[1., 0., 0., 0., 1.],
            [1., 1., 0., 0., 0.],
            [0., 1., 1., 0., 0.],
            [0., 0., 1., 1., 0.],
            [0., 0., 0., 1., 1.]])
    >>> dgl.khop_adj(g, 3)
    tensor([[1., 0., 1., 3., 3.],
            [3., 1., 0., 1., 3.],
            [3., 3., 1., 0., 1.],
            [1., 3., 3., 1., 0.],
            [0., 1., 3., 3., 1.]])
    """
    adj_k = g.adjacency_matrix_scipy(return_edge_ids=False) ** k
400
    return F.tensor(adj_k.todense().astype(np.float32))
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421

def khop_graph(g, k):
    """Return the graph that includes all :math:`k`-hop neighbors of the given graph as edges.
    The adjacency matrix of the returned graph is :math:`A^k`
    (where :math:`A` is the adjacency matrix of :math:`g`).

    Parameters
    ----------
    g : dgl.DGLGraph
        The input graph.
    k : int
        The :math:`k` in `k`-hop graph.

    Returns
    -------
    dgl.DGLGraph
        The returned ``DGLGraph``.

    Examples
    --------

Mufei Li's avatar
Mufei Li committed
422
423
424
425
426
427
428
429
430
431
432
433
    Below gives an easy example:

    >>> import dgl
    >>> g = dgl.DGLGraph()
    >>> g.add_nodes(3)
    >>> g.add_edges([0, 1], [1, 2])
    >>> g_2 = dgl.transform.khop_graph(g, 2)
    >>> print(g_2.edges())
    (tensor([0]), tensor([2]))

    A more complicated example:

434
435
436
437
438
439
440
441
442
443
444
445
446
447
    >>> import dgl
    >>> g = dgl.DGLGraph()
    >>> g.add_nodes(5)
    >>> g.add_edges([0,1,2,3,4,0,1,2,3,4], [0,1,2,3,4,1,2,3,4,0])
    >>> dgl.khop_graph(g, 1)
    DGLGraph(num_nodes=5, num_edges=10,
             ndata_schemes={}
             edata_schemes={})
    >>> dgl.khop_graph(g, 3)
    DGLGraph(num_nodes=5, num_edges=40,
             ndata_schemes={}
             edata_schemes={})
    """
    n = g.number_of_nodes()
Mufei Li's avatar
Mufei Li committed
448
    adj_k = g.adjacency_matrix_scipy(transpose=True, return_edge_ids=False) ** k
449
450
451
452
453
454
    adj_k = adj_k.tocoo()
    multiplicity = adj_k.data
    row = np.repeat(adj_k.row, multiplicity)
    col = np.repeat(adj_k.col, multiplicity)
    # TODO(zihao): we should support creating multi-graph from scipy sparse matrix
    # in the future.
455
    return DGLGraph(from_coo(n, row, col, True))
456

457
def reverse(g, copy_ndata=False, copy_edata=False):
458
459
460
    """Return the reverse of a graph
    The reverse (also called converse, transpose) of a directed graph is another directed
    graph on the same nodes with edges reversed in terms of direction.
461
    Given a :class:`dgl.DGLGraph` object, we return another :class:`dgl.DGLGraph` object
462
463
464
465
466
    representing its reverse.

    Parameters
    ----------
    g : dgl.DGLGraph
467
        The input graph.
468
469
    copy_ndata: bool, optional
        If True, node attributes are copied from the original graph to the reversed graph.
470
        Otherwise the reversed graph will not be initialized with node attributes.
471
472
    copy_edata: bool, optional
        If True, edge attributes are copied from the original graph to the reversed graph.
473
474
        Otherwise the reversed graph will not have edge attributes.

475
476
477
478
479
480
481
482
483
484
485
486
    Return
    ------
    dgl.DGLGraph
        The reversed graph.

    Notes
    -----
    * We do not dynamically update the topology of a graph once that of its reverse changes.
      This can be particularly problematic when the node/edge attrs are shared. For example,
      if the topology of both the original graph and its reverse get changed independently,
      you can get a mismatched node/edge feature.

487
488
489
490
491
492
493
494
495
496
497
    Examples
    --------
    Create a graph to reverse.
    >>> import dgl
    >>> import torch as th
    >>> g = dgl.DGLGraph()
    >>> g.add_nodes(3)
    >>> g.add_edges([0, 1, 2], [1, 2, 0])
    >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]])
    >>> g.edata['h'] = th.tensor([[3.], [4.], [5.]])
    Reverse the graph and examine its structure.
498
    >>> rg = g.reverse(copy_ndata=True, copy_edata=True)
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
    >>> print(rg)
    DGLGraph with 3 nodes and 3 edges.
    Node data: {'h': Scheme(shape=(1,), dtype=torch.float32)}
    Edge data: {'h': Scheme(shape=(1,), dtype=torch.float32)}
    The edges are reversed now.
    >>> rg.has_edges_between([1, 2, 0], [0, 1, 2])
    tensor([1, 1, 1])
    Reversed edges have the same feature as the original ones.
    >>> g.edges[[0, 2], [1, 0]].data['h'] == rg.edges[[1, 0], [0, 2]].data['h']
    tensor([[1],
            [1]], dtype=torch.uint8)
    The node/edge features of the reversed graph share memory with the original
    graph, which is helpful for both forward computation and back propagation.
    >>> g.ndata['h'] = g.ndata['h'] + 1
    >>> rg.ndata['h']
    tensor([[1.],
            [2.],
            [3.]])
    """
518
    g_reversed = DGLGraph()
519
    g_reversed.add_nodes(g.number_of_nodes())
Mufei Li's avatar
Mufei Li committed
520
    g_edges = g.all_edges(order='eid')
521
    g_reversed.add_edges(g_edges[1], g_edges[0])
522
523
    g_reversed._batch_num_nodes = g._batch_num_nodes
    g_reversed._batch_num_edges = g._batch_num_edges
524
    if copy_ndata:
525
        g_reversed._node_frame = g._node_frame
526
    if copy_edata:
527
528
        g_reversed._edge_frame = g._edge_frame
    return g_reversed
529

530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
def reverse_heterograph(g, copy_ndata=True, copy_edata=False):
    r"""Return the reverse of a graph.

    The reverse (also called converse, transpose) of a graph with edges
    :math:`(i_1, j_1), (i_2, j_2), \cdots` is a new graph with edges
    :math:`(j_1, i_1), (j_2, i_2), \cdots`.

    For a heterograph with multiple edge types, we can treat edges corresponding
    to each type as a separate graph and compute the reverse for each of them.
    If the original edge type is (A, B, C), its reverse will have edge type (C, B, A).

    Given a :class:`dgl.DGLGraph` object, we return another :class:`dgl.DGLGraph`
    object representing its reverse.

    Parameters
    ----------
    g : dgl.DGLGraph
        The input graph.
    copy_ndata: bool, optional
        If True, the node features of the reversed graph are copied from the
        original graph. If False, the reversed graph will not have any node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the reversed graph are copied from the
        original graph. If False, the reversed graph will not have any edge features.
        (Default: False)

    Return
    ------
    dgl.DGLGraph
        The reversed graph.

    Notes
    -----
    If ``copy_ndata`` or ``copy_edata`` is ``True``, same tensors will be used for
    the features of the original graph and the reversed graph to save memory cost.
    As a result, users
    should avoid performing in-place operations on the features of the reversed
    graph, which will corrupt the features of the original graph as well. For
    concrete examples, refer to the ``Examples`` section below.

    Examples
    --------
    **Homographs or Heterographs with A Single Edge Type**

    Create a graph to reverse.

    >>> import dgl
    >>> import torch as th
    >>> g = dgl.graph((th.tensor([0, 1, 2]), th.tensor([1, 2, 0])))
    >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]])
    >>> g.edata['h'] = th.tensor([[3.], [4.], [5.]])

    Reverse the graph.

    >>> rg = dgl.reverse(g, copy_edata=True)
    >>> rg.ndata['h']
    tensor([[0.],
            [1.],
            [2.]])

    The i-th edge in the reversed graph corresponds to the i-th edge in the
    original graph. When ``copy_edata`` is ``True``, they have the same features.

    >>> rg.edges()
    (tensor([1, 2, 0]), tensor([0, 1, 2]))
    >>> rg.edata['h']
    tensor([[3.],
            [4.],
            [5.]])

    **In-place operations on features of one graph will be reflected on features of
    its reverse, which is dangerous. Out-place operations will not be reflected.**

    >>> rg.ndata['h'] += 1
    >>> g.ndata['h']
    tensor([[1.],
            [2.],
            [3.]])
    >>> g.ndata['h'] += 1
    >>> rg.ndata['h']
    tensor([[2.],
            [3.],
            [4.]])
    >>> rg.ndata['h2'] = th.ones(3, 1)
    >>> 'h2' in g.ndata
    False

    **Heterographs with Multiple Edge Types**

    >>> g = dgl.heterograph({
    >>>     ('user', 'follows', 'user'): (th.tensor([0, 2]), th.tensor([1, 2])),
    >>>     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
    >>> })
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)

    The reverse of the graph above can be obtained by combining the reverse of the
    subgraph corresponding to ('user', 'follows', 'user') and the subgraph corresponding
    to ('user', 'plays', 'game'). The reverse for a graph with relation (h, r, t) will
    have relation (t, r, h).

    >>> rg = dgl.reverse(g, copy_ndata=True)
    >>> rg
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'follows', 'user'): 2, ('game', 'plays', 'user'): 3},
          metagraph=[('user', 'user'), ('game', 'user')])
    >>> rg.edges(etype='follows')
    (tensor([1, 2]), tensor([0, 2]))
    >>> rg.edges(etype='plays')
    (tensor([2, 1, 1]), tensor([1, 2, 1]))
    >>> rg.nodes['game'].data['hv]
    tensor([[1.],
            [1.],
            [1.]])
    >>> rg.edges['plays'].data
    {}
    """
    # TODO(0.5 release, xiangsx) need to handle BLOCK
    # currently reversing a block results in undefined behavior
650
651
    gidx = g._graph.reverse()
    new_g = DGLHeteroGraph(gidx, g.ntypes, g.etypes)
652
653
654
655
656
657
658
659
660
661
662
663

    # handle ndata
    if copy_ndata:
        # for each ntype
        for ntype in g.ntypes:
            # for each data field
            for k in g.nodes[ntype].data:
                new_g.nodes[ntype].data[k] = g.nodes[ntype].data[k]

    # handle edata
    if copy_edata:
        # for each etype
664
        for etype in g.etypes:
665
666
667
668
669
670
671
672
            # for each data field
            for k in g.edges[etype].data:
                new_g.edges[etype].data[k] = g.edges[etype].data[k]

    return new_g

DGLHeteroGraph.reverse = reverse_heterograph

673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
def to_simple_graph(g):
    """Convert the graph to a simple graph with no multi-edge.

    The function generates a new *readonly* graph with no node/edge feature.

    Parameters
    ----------
    g : DGLGraph
        The input graph.

    Returns
    -------
    DGLGraph
        A simple graph.
    """
688
689
    gidx = _CAPI_DGLToSimpleGraph(g._graph)
    return DGLGraph(gidx, readonly=True)
690

691
def to_bidirected_stale(g, readonly=True):
692
693
694
    """Convert the graph to a bidirected graph.

    The function generates a new graph with no node/edge feature.
695
696
697
698
699
    If g has an edge for i->j but no edge for j->i, then the
    returned graph will have both i->j and j->i.

    If the input graph is a multigraph (there are multiple edges from node i to node j),
    the returned graph isn't well defined.
700
701
702
703
704
705
706
707

    Parameters
    ----------
    g : DGLGraph
        The input graph.
    readonly : bool, default to be True
        Whether the returned bidirected graph is readonly or not.

708
709
710
711
    Notes
    -----
    Please make sure g is a single graph, otherwise the return value is undefined.

712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
    Returns
    -------
    DGLGraph

    Examples
    --------
    The following two examples use PyTorch backend, one for non-multi graph
    and one for multi-graph.

    >>> g = dgl.DGLGraph()
    >>> g.add_nodes(2)
    >>> g.add_edges([0, 0], [0, 1])
    >>> bg1 = dgl.to_bidirected(g)
    >>> bg1.edges()
    (tensor([0, 1, 0]), tensor([0, 0, 1]))
    """
    if readonly:
729
        newgidx = _CAPI_DGLToBidirectedImmutableGraph(g._graph)
730
    else:
731
        newgidx = _CAPI_DGLToBidirectedMutableGraph(g._graph)
732
733
    return DGLGraph(newgidx)

734
735
736
737
738
739
740
741
def laplacian_lambda_max(g):
    """Return the largest eigenvalue of the normalized symmetric laplacian of g.

    The eigenvalue of the normalized symmetric of any graph is less than or equal to 2,
    ref: https://en.wikipedia.org/wiki/Laplacian_matrix#Properties

    Parameters
    ----------
742
    g : DGLGraph
743
744
745
746
747
        The input graph, it should be an undirected graph.

    Returns
    -------
    list :
748
749
        Return a list, where the i-th item indicates the largest eigenvalue
        of i-th graph in g.
750
751
752
753
754
755
756
757
758
759
760

    Examples
    --------

    >>> import dgl
    >>> g = dgl.DGLGraph()
    >>> g.add_nodes(5)
    >>> g.add_edges([0, 1, 2, 3, 4, 0, 1, 2, 3, 4], [1, 2, 3, 4, 0, 4, 0, 1, 2, 3])
    >>> dgl.laplacian_lambda_max(g)
    [1.809016994374948]
    """
761
    g_arr = unbatch(g)
762
763
764
765
    rst = []
    for g_i in g_arr:
        n = g_i.number_of_nodes()
        adj = g_i.adjacency_matrix_scipy(return_edge_ids=False).astype(float)
766
        norm = sparse.diags(F.asnumpy(g_i.in_degrees()).clip(1) ** -0.5, dtype=float)
767
768
769
770
771
        laplacian = sparse.eye(n) - norm * adj * norm
        rst.append(sparse.linalg.eigs(laplacian, 1, which='LM',
                                      return_eigenvectors=False)[0].real)
    return rst

Mufei Li's avatar
Mufei Li committed
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
def metapath_reachable_graph(g, metapath):
    """Return a graph where the successors of any node ``u`` are nodes reachable from ``u`` by
    the given metapath.

    If the beginning node type ``s`` and ending node type ``t`` are the same, it will return
    a homogeneous graph with node type ``s = t``.  Otherwise, a unidirectional bipartite graph
    with source node type ``s`` and destination node type ``t`` is returned.

    In both cases, two nodes ``u`` and ``v`` will be connected with an edge ``(u, v)`` if
    there exists one path matching the metapath from ``u`` to ``v``.

    The result graph keeps the node set of type ``s`` and ``t`` in the original graph even if
    they might have no neighbor.

    The features of the source/destination node type in the original graph would be copied to
    the new graph.

    Parameters
    ----------
    g : DGLHeteroGraph
        The input graph
    metapath : list[str or tuple of str]
        Metapath in the form of a list of edge types

    Returns
    -------
    DGLHeteroGraph
        A homogeneous or bipartite graph.
    """
    adj = 1
802
    index_dtype = g._idtype_str
Mufei Li's avatar
Mufei Li committed
803
804
805
806
807
808
809
810
    for etype in metapath:
        adj = adj * g.adj(etype=etype, scipy_fmt='csr', transpose=True)

    adj = (adj != 0).tocsr()
    srctype = g.to_canonical_etype(metapath[0])[0]
    dsttype = g.to_canonical_etype(metapath[-1])[2]
    if srctype == dsttype:
        assert adj.shape[0] == adj.shape[1]
811
        new_g = graph(adj, ntype=srctype, index_dtype=index_dtype)
Mufei Li's avatar
Mufei Li committed
812
    else:
813
        new_g = bipartite(adj, utype=srctype, vtype=dsttype, index_dtype=index_dtype)
Mufei Li's avatar
Mufei Li committed
814
815
816
817
818
819
820
821

    for key, value in g.nodes[srctype].data.items():
        new_g.nodes[srctype].data[key] = value
    if srctype != dsttype:
        for key, value in g.nodes[dsttype].data.items():
            new_g.nodes[dsttype].data[key] = value

    return new_g
822

VoVAllen's avatar
VoVAllen committed
823
824
825
826
def add_self_loop(g):
    """Return a new graph containing all the edges in the input graph plus self loops
    of every nodes.
    No duplicate self loop will be added for nodes already having self loops.
827
828
829
830
831
832
833
834
    Self-loop edges id are not preserved. All self-loop edges would be added at the end.

    Examples
    ---------

    >>> g = DGLGraph()
    >>> g.add_nodes(5)
    >>> g.add_edges([0, 1, 2], [1, 1, 2])
VoVAllen's avatar
VoVAllen committed
835
    >>> new_g = dgl.transform.add_self_loop(g) # Nodes 0, 3, 4 don't have self-loop
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
    >>> new_g.edges()
    (tensor([0, 0, 1, 2, 3, 4]), tensor([1, 0, 1, 2, 3, 4]))

    Parameters
    ------------
    g: DGLGraph

    Returns
    --------
    DGLGraph
    """
    new_g = DGLGraph()
    new_g.add_nodes(g.number_of_nodes())
    src, dst = g.all_edges(order="eid")
    src = F.zerocopy_to_numpy(src)
    dst = F.zerocopy_to_numpy(dst)
    non_self_edges_idx = src != dst
    nodes = np.arange(g.number_of_nodes())
    new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx])
    new_g.add_edges(nodes, nodes)
    return new_g

def remove_self_loop(g):
    """Return a new graph with all self-loop edges removed

    Examples
    ---------

    >>> g = DGLGraph()
    >>> g.add_nodes(5)
    >>> g.add_edges([0, 1, 2], [1, 1, 2])
    >>> new_g = dgl.transform.remove_self_loop(g) # Nodes 1, 2 have self-loop
    >>> new_g.edges()
    (tensor([0]), tensor([1]))

    Parameters
    ------------
    g: DGLGraph

    Returns
    --------
    DGLGraph
    """
    new_g = DGLGraph()
    new_g.add_nodes(g.number_of_nodes())
    src, dst = g.all_edges(order="eid")
    src = F.zerocopy_to_numpy(src)
    dst = F.zerocopy_to_numpy(dst)
    non_self_edges_idx = src != dst
    new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx])
    return new_g

Da Zheng's avatar
Da Zheng committed
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
def reorder_nodes(g, new_node_ids):
    """ Generate a new graph with new node Ids.

    We assign each node in the input graph with a new node Id. This results in
    a new graph.

    Parameters
    ----------
    g : DGLGraph
        The input graph
    new_node_ids : a tensor
        The new node Ids
    Returns
    -------
    DGLGraph
        The graph with new node Ids.
    """
    assert len(new_node_ids) == g.number_of_nodes(), \
            "The number of new node ids must match #nodes in the graph."
    new_node_ids = utils.toindex(new_node_ids)
    sorted_ids, idx = F.sort_1d(new_node_ids.tousertensor())
    assert F.asnumpy(sorted_ids[0]) == 0 \
            and F.asnumpy(sorted_ids[-1]) == g.number_of_nodes() - 1, \
            "The new node Ids are incorrect."
    new_gidx = _CAPI_DGLReorderGraph(g._graph, new_node_ids.todgltensor())
    new_g = DGLGraph(new_gidx)
    new_g.ndata['orig_id'] = idx
    return new_g

def partition_graph_with_halo(g, node_part, extra_cached_hops, reshuffle=False):
    '''Partition a graph.

    Based on the given node assignments for each partition, the function splits
    the input graph into subgraphs. A subgraph may contain HALO nodes which does
    not belong to the partition of a subgraph but are connected to the nodes
    in the partition within a fixed number of hops.

    If `reshuffle` is turned on, the function reshuffles node Ids and edge Ids
    of the input graph before partitioning. After reshuffling, all nodes and edges
    in a partition fall in a contiguous Id range in the input graph.
    The partitioend subgraphs have node data 'orig_id', which stores the node Ids
    in the original input graph.
930
931
932
933
934
935
936
937
938

    Parameters
    ------------
    g: DGLGraph
        The graph to be partitioned
    node_part: 1D tensor
        Specify which partition a node is assigned to. The length of this tensor
        needs to be the same as the number of nodes of the graph. Each element
        indicates the partition Id of a node.
Da Zheng's avatar
Da Zheng committed
939
    extra_cached_hops: int
940
        The number of hops a HALO node can be accessed.
Da Zheng's avatar
Da Zheng committed
941
942
943
    reshuffle : bool
        Resuffle nodes so that nodes in the same partition are in the same Id range.

944
945
946
947
948
949
950
    Returns
    --------
    a dict of DGLGraphs
        The key is the partition Id and the value is the DGLGraph of the partition.
    '''
    assert len(node_part) == g.number_of_nodes()
    node_part = utils.toindex(node_part)
Da Zheng's avatar
Da Zheng committed
951
952
953
    if reshuffle:
        node_part = node_part.tousertensor()
        sorted_part, new2old_map = F.sort_1d(node_part)
954
955
        new_node_ids = np.zeros((g.number_of_nodes(),), dtype=np.int64)
        new_node_ids[F.asnumpy(new2old_map)] = np.arange(0, g.number_of_nodes())
Da Zheng's avatar
Da Zheng committed
956
957
958
959
960
961
962
963
964
        g = reorder_nodes(g, new_node_ids)
        node_part = utils.toindex(sorted_part)
        # We reassign edges in in-CSR. In this way, after partitioning, we can ensure
        # that all edges in a partition are in the contiguous Id space.
        orig_eids = _CAPI_DGLReassignEdges(g._graph, True)
        orig_eids = utils.toindex(orig_eids)
        g.edata['orig_id'] = orig_eids.tousertensor()

    subgs = _CAPI_DGLPartitionWithHalo(g._graph, node_part.todgltensor(), extra_cached_hops)
965
    subg_dict = {}
Da Zheng's avatar
Da Zheng committed
966
    node_part = node_part.tousertensor()
967
968
    for i, subg in enumerate(subgs):
        inner_node = _get_halo_subgraph_inner_node(subg)
969
        subg = g._create_subgraph(subg, subg.induced_nodes, subg.induced_edges)
970
971
        inner_node = F.zerocopy_from_dlpack(inner_node.to_dlpack())
        subg.ndata['inner_node'] = inner_node
Da Zheng's avatar
Da Zheng committed
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
        subg.ndata['part_id'] = F.gather_row(node_part, subg.parent_nid)
        if reshuffle:
            subg.ndata['orig_id'] = F.gather_row(g.ndata['orig_id'], subg.ndata[NID])
            subg.edata['orig_id'] = F.gather_row(g.edata['orig_id'], subg.edata[EID])

        if extra_cached_hops >= 1:
            inner_edge = F.zeros((subg.number_of_edges(),), F.int64, F.cpu())
            inner_nids = F.nonzero_1d(subg.ndata['inner_node'])
            # TODO(zhengda) we need to fix utils.toindex() to avoid the dtype cast below.
            inner_nids = F.astype(inner_nids, F.int64)
            inner_eids = subg.in_edges(inner_nids, form='eid')
            inner_edge = F.scatter_row(inner_edge, inner_eids,
                                       F.ones((len(inner_eids),), F.dtype(inner_edge), F.cpu()))
        else:
            inner_edge = F.ones((subg.number_of_edges(),), F.int64, F.cpu())
987
988
989
990
        subg.edata['inner_edge'] = inner_edge
        subg_dict[i] = subg
    return subg_dict

991
def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False):
992
993
    ''' This assigns nodes to different partitions with Metis partitioning algorithm.

994
995
996
997
998
999
1000
1001
1002
1003
    When performing Metis partitioning, we can put some constraint on the partitioning.
    Current, it supports two constrants to balance the partitioning. By default, Metis
    always tries to balance the number of nodes in each partition.

    * `balance_ntypes` balances the number of nodes of different types in each partition.
    * `balance_edges` balances the number of edges in each partition.

    To balance the node types, a user needs to pass a vector of N elements to indicate
    the type of each node. N is the number of nodes in the input graph.

1004
1005
1006
1007
1008
1009
1010
1011
    After the partition assignment, we construct partitions.

    Parameters
    ----------
    g : DGLGraph
        The graph to be partitioned
    k : int
        The number of partitions.
1012
1013
1014
1015
    balance_ntypes : tensor
        Node type of each node
    balance_edges : bool
        Indicate whether to balance the edges.
1016
1017
1018
1019
1020

    Returns
    -------
    a 1-D tensor
        A vector with each element that indicates the partition Id of a vertex.
1021
    '''
1022
1023
    # METIS works only on symmetric graphs.
    # The METIS runs on the symmetric graph to generate the node assignment to partitions.
1024
    sym_g = to_bidirected_stale(g, readonly=True)
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
    vwgt = []
    # To balance the node types in each partition, we can take advantage of the vertex weights
    # in Metis. When vertex weights are provided, Metis will tries to generate partitions with
    # balanced vertex weights. A vertex can be assigned with multiple weights. The vertex weights
    # are stored in a vector of N * w elements, where N is the number of vertices and w
    # is the number of weights per vertex. Metis tries to balance the first weight, and then
    # the second weight, and so on.
    # When balancing node types, we use the first weight to indicate the first node type.
    # if a node belongs to the first node type, its weight is set to 1; otherwise, 0.
    # Similary, we set the second weight for the second node type and so on. The number
    # of weights is the same as the number of node types.
    if balance_ntypes is not None:
        assert len(balance_ntypes) == g.number_of_nodes(), \
                "The length of balance_ntypes should be equal to #nodes in the graph"
1039
        balance_ntypes = F.tensor(balance_ntypes)
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
        uniq_ntypes = F.unique(balance_ntypes)
        for ntype in uniq_ntypes:
            vwgt.append(F.astype(balance_ntypes == ntype, F.int64))

    # When balancing edges in partitions, we use in-degree as one of the weights.
    if balance_edges:
        vwgt.append(F.astype(g.in_degrees(), F.int64))

    # The vertex weights have to be stored in a vector.
    if len(vwgt) > 0:
        vwgt = F.stack(vwgt, 1)
        shape = (np.prod(F.shape(vwgt),),)
        vwgt = F.reshape(vwgt, shape)
        vwgt = F.zerocopy_to_dgl_ndarray(vwgt)
    else:
        vwgt = F.zeros((0,), F.int64, F.cpu())
        vwgt = F.zerocopy_to_dgl_ndarray(vwgt)

    node_part = _CAPI_DGLMetisPartition(sym_g._graph, k, vwgt)
1059
1060
1061
1062
1063
1064
    if len(node_part) == 0:
        return None
    else:
        node_part = utils.toindex(node_part)
        return node_part.tousertensor()

1065
1066
def metis_partition(g, k, extra_cached_hops=0, reshuffle=False,
                    balance_ntypes=None, balance_edges=False):
1067
    ''' This is to partition a graph with Metis partitioning.
1068

Da Zheng's avatar
Da Zheng committed
1069
1070
1071
1072
    Metis assigns vertices to partitions. This API constructs subgraphs with the vertices assigned
    to the partitions and their incoming edges. A subgraph may contain HALO nodes which does
    not belong to the partition of a subgraph but are connected to the nodes
    in the partition within a fixed number of hops.
1073

1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
    When performing Metis partitioning, we can put some constraint on the partitioning.
    Current, it supports two constrants to balance the partitioning. By default, Metis
    always tries to balance the number of nodes in each partition.

    * `balance_ntypes` balances the number of nodes of different types in each partition.
    * `balance_edges` balances the number of edges in each partition.

    To balance the node types, a user needs to pass a vector of N elements to indicate
    the type of each node. N is the number of nodes in the input graph.

Da Zheng's avatar
Da Zheng committed
1084
1085
1086
1087
1088
1089
1090
1091
1092
    If `reshuffle` is turned on, the function reshuffles node Ids and edge Ids
    of the input graph before partitioning. After reshuffling, all nodes and edges
    in a partition fall in a contiguous Id range in the input graph.
    The partitioend subgraphs have node data 'orig_id', which stores the node Ids
    in the original input graph.

    The partitioned subgraph is stored in DGLGraph. The DGLGraph has the `part_id`
    node data that indicates the partition a node belongs to. The subgraphs do not contain
    the node/edge data in the input graph.
1093
1094
1095
1096
1097
1098
1099
1100
1101

    Parameters
    ------------
    g: DGLGraph
        The graph to be partitioned
    k: int
        The number of partitions.
    extra_cached_hops: int
        The number of hops a HALO node can be accessed.
Da Zheng's avatar
Da Zheng committed
1102
1103
    reshuffle : bool
        Resuffle nodes so that nodes in the same partition are in the same Id range.
1104
1105
1106
1107
    balance_ntypes : tensor
        Node type of each node
    balance_edges : bool
        Indicate whether to balance the edges.
Da Zheng's avatar
Da Zheng committed
1108

1109
1110
1111
1112
1113
    Returns
    --------
    a dict of DGLGraphs
        The key is the partition Id and the value is the DGLGraph of the partition.
    '''
1114
1115
    node_part = metis_partition_assignment(g, k, balance_ntypes, balance_edges)
    if node_part is None:
1116
1117
        return None

1118
    # Then we split the original graph into parts based on the METIS partitioning results.
Da Zheng's avatar
Da Zheng committed
1119
    return partition_graph_with_halo(g, node_part, extra_cached_hops, reshuffle)
1120

1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
def compact_graphs(graphs, always_preserve=None):
    """Given a list of graphs with the same set of nodes, find and eliminate the common
    isolated nodes across all graphs.

    This function requires the graphs to have the same set of nodes (i.e. the node types
    must be the same, and the number of nodes of each node type must be the same).  The
    metagraph does not have to be the same.

    It finds all the nodes that have zero in-degree and zero out-degree in all the given
    graphs, and eliminates them from all the graphs.

    Useful for graph sampling where we have a giant graph but we only wish to perform
    message passing on a smaller graph with a (tiny) subset of nodes.

    The node and edge features are not preserved.

    Parameters
    ----------
    graphs : DGLHeteroGraph or list[DGLHeteroGraph]
        The graph, or list of graphs
    always_preserve : Tensor or dict[str, Tensor], optional
        If a dict of node types and node ID tensors is given, the nodes of given
        node types would not be removed, regardless of whether they are isolated.
        If a Tensor is given, assume that all the graphs have one (same) node type.

    Returns
    -------
    DGLHeteroGraph or list[DGLHeteroGraph]
        The compacted graph or list of compacted graphs.

        Each returned graph would have a feature ``dgl.NID`` containing the mapping
        of node IDs for each type from the compacted graph(s) to the original graph(s).
        Note that the mapping is the same for all the compacted graphs.

    Bugs
    ----
    This function currently requires that the same node type of all graphs should have
    the same node type ID, i.e. the node types are *ordered* the same.

    Examples
    --------
    The following code constructs a bipartite graph with 20 users and 10 games, but
    only user #1 and #3, as well as game #3 and #5, have connections:

1165
    >>> g = dgl.bipartite([(1, 3), (3, 5)], 'user', 'plays', 'game', num_nodes=(20, 10))
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186

    The following would compact the graph above to another bipartite graph with only
    two users and two games.

    >>> new_g, induced_nodes = dgl.compact_graphs(g)
    >>> induced_nodes
    {'user': tensor([1, 3]), 'game': tensor([3, 5])}

    The mapping tells us that only user #1 and #3 as well as game #3 and #5 are kept.
    Furthermore, the first user and second user in the compacted graph maps to
    user #1 and #3 in the original graph.  Games are similar.

    One can verify that the edge connections are kept the same in the compacted graph.

    >>> new_g.edges(form='all', order='eid', etype='plays')
    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))

    When compacting multiple graphs, nodes that do not have any connections in any
    of the given graphs are removed.  So if we compact ``g`` and the following ``g2``
    graphs together:

1187
    >>> g2 = dgl.bipartite([(1, 6), (6, 8)], 'user', 'plays', 'game', num_nodes=(20, 10))
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
    >>> (new_g, new_g2), induced_nodes = dgl.compact_graphs([g, g2])
    >>> induced_nodes
    {'user': tensor([1, 3, 6]), 'game': tensor([3, 5, 6, 8])}

    Then one can see that user #1 from both graphs, users #3 from the first graph, as
    well as user #6 from the second graph, are kept.  Games are similar.

    Similarly, one can also verify the connections:

    >>> new_g.edges(form='all', order='eid', etype='plays')
    (tensor([0, 1]), tensor([0, 1]), tensor([0, 1]))
    >>> new_g2.edges(form='all', order='eid', etype='plays')
    (tensor([0, 2]), tensor([2, 3]), tensor([0, 1]))
    """
    return_single = False
    if not isinstance(graphs, Iterable):
        graphs = [graphs]
        return_single = True
    if len(graphs) == 0:
        return []

    # Ensure the node types are ordered the same.
    # TODO(BarclayII): we ideally need to remove this constraint.
    ntypes = graphs[0].ntypes
1212
    graph_dtype = graphs[0]._idtype_str
1213
    graph_ctx = graphs[0]._graph.ctx
1214
1215
1216
1217
    for g in graphs:
        assert ntypes == g.ntypes, \
            ("All graphs should have the same node types in the same order, got %s and %s" %
             ntypes, g.ntypes)
1218
1219
        assert graph_dtype == g._idtype_str, "Expect graph data type to be {}, but got {}".format(
            graph_dtype, g._idtype_str)
1220
1221
        assert graph_ctx == g._graph.ctx, "Expect graph device to be {}, but got {}".format(
            graph_ctx, g._graph.ctx)
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242

    # Process the dictionary or tensor of "always preserve" nodes
    if always_preserve is None:
        always_preserve = {}
    elif not isinstance(always_preserve, Mapping):
        if len(ntypes) > 1:
            raise ValueError("Node type must be given if multiple node types exist.")
        always_preserve = {ntypes[0]: always_preserve}

    always_preserve_nd = []
    for ntype in ntypes:
        nodes = always_preserve.get(ntype, None)
        if nodes is None:
            nodes = nd.empty([0], graph_dtype, graph_ctx)
        else:
            nodes = F.zerocopy_to_dgl_ndarray(nodes)
        always_preserve_nd.append(nodes)

    # Compact and construct heterographs
    new_graph_indexes, induced_nodes = _CAPI_DGLCompactGraphs(
        [g._graph for g in graphs], always_preserve_nd)
1243
    induced_nodes = [F.zerocopy_from_dgl_ndarray(nodes) for nodes in induced_nodes]
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255

    new_graphs = [
        DGLHeteroGraph(new_graph_index, graph.ntypes, graph.etypes)
        for new_graph_index, graph in zip(new_graph_indexes, graphs)]
    for g in new_graphs:
        for i, ntype in enumerate(graphs[0].ntypes):
            g.nodes[ntype].data[NID] = induced_nodes[i]
    if return_single:
        new_graphs = new_graphs[0]

    return new_graphs

1256
def to_block(g, dst_nodes=None, include_dst_in_src=True):
1257
1258
    """Convert a graph into a bipartite-structured "block" for message passing.

1259
1260
1261
    A block graph is uni-directional bipartite graph consisting of two sets of nodes
    SRC and DST. Each set can have many node types while all the edges are from SRC
    nodes to DST nodes.
1262

1263
1264
1265
1266
1267
1268
1269
1270
    Specifically, for each relation graph of canonical edge type ``(utype, etype, vtype)``,
    node type ``utype`` belongs to SRC while ``vtype`` belongs to DST.
    Edges from node type ``utype`` to node type ``vtype`` are preserved. If
    ``utype == vtype``, the result graph will have two node types of the same name ``utype``,
    but one belongs to SRC while the other belongs to DST. This is because although
    they have the same name, their node ids are relabeled differently (see below). In
    both cases, the canonical edge type in the new graph is still
    ``(utype, etype, vtype)``, so there is no difference when referring to it.
1271

1272
1273
    Moreover, the function also relabels node ids in each type to make the graph more compact.
    Specifically, the nodes of type ``vtype`` would contain the nodes that have at least one
1274
    inbound edge of any type, while ``utype`` would contain all the DST nodes of type ``vtype``,
1275
    as well as the nodes that have at least one outbound edge to any DST node.
1276

1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
    Since DST nodes are included in SRC nodes, a common requirement is to fetch
    the DST node features from the SRC nodes features. To avoid expensive sparse lookup,
    the function assures that the DST nodes in both SRC and DST sets have the same ids.
    As a result, given the node feature tensor ``X`` of type ``utype``,
    the following code finds the corresponding DST node features of type ``vtype``:

    .. code::

        X[:block.number_of_nodes('DST/vtype')]

    If the ``dst_nodes`` argument is given, the DST nodes would contain the given nodes.
    Otherwise, the DST nodes would be determined by DGL via the rules above.
1289
1290
1291
1292
1293

    Parameters
    ----------
    graph : DGLHeteroGraph
        The graph.
1294
1295
    dst_nodes : Tensor or dict[str, Tensor], optional
        Optional DST nodes. If a tensor is given, the graph must have only one node type.
1296
1297
    include_dst_in_src : bool, default True
        If False, do not include DST nodes in SRC nodes.
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310

    Returns
    -------
    DGLHeteroGraph
        The new graph describing the block.

        The node IDs induced for each type in both sides would be stored in feature
        ``dgl.NID``.

        The edge IDs induced for each type would be stored in feature ``dgl.EID``.

    Notes
    -----
1311
    This function is primarily for creating the structures for efficient
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
    computation of message passing.  See [TODO] for a detailed example.

    Examples
    --------
    Converting a homogeneous graph to a block as described above:
    >>> g = dgl.graph([(0, 1), (1, 2), (2, 3)])
    >>> block = dgl.to_block(g, torch.LongTensor([3, 2]))

    The right hand side nodes would be exactly the same as the ones given: [3, 2].
    >>> induced_dst = block.dstdata[dgl.NID]
    >>> induced_dst
    tensor([3, 2])

    The first few nodes of the left hand side nodes would also be exactly the same as
    the ones given.  The rest of the nodes are the ones necessary for message passing
    into nodes 3, 2.  This means that the node 1 would be included.
    >>> induced_src = block.srcdata[dgl.NID]
    >>> induced_src
    tensor([3, 2, 1])

    We can notice that the first two nodes are identical to the given nodes as well as
    the right hand side nodes.

    The induced edges can also be obtained by the following:
    >>> block.edata[dgl.EID]
    tensor([2, 1])

    This indicates that edge (2, 3) and (1, 2) are included in the result graph.  We can
    verify that the first edge in the block indeed maps to the edge (2, 3), and the
    second edge in the block indeed maps to the edge (1, 2):
    >>> src, dst = block.edges(order='eid')
    >>> induced_src[src], induced_dst[dst]
    (tensor([2, 1]), tensor([3, 2]))

    Converting a heterogeneous graph to a block is similar, except that when specifying
    the right hand side nodes, you have to give a dict:
    >>> g = dgl.bipartite([(0, 1), (1, 2), (2, 3)], utype='A', vtype='B')

1350
    If you don't specify any node of type A on the right hand side, the node type ``A``
1351
    in the block would have zero nodes on the DST side.
1352
    >>> block = dgl.to_block(g, {'B': torch.LongTensor([3, 2])})
1353
    >>> block.number_of_dst_nodes('A')
1354
    0
1355
    >>> block.number_of_dst_nodes('B')
1356
    2
1357
    >>> block.dstnodes['B'].data[dgl.NID]
1358
1359
1360
    tensor([3, 2])

    The left hand side would contain all the nodes on the right hand side:
1361
    >>> block.srcnodes['B'].data[dgl.NID]
1362
1363
1364
    tensor([3, 2])

    As well as all the nodes that have connections to the nodes on the right hand side:
1365
    >>> block.srcnodes['A'].data[dgl.NID]
1366
1367
    tensor([2, 1])
    """
1368
    if dst_nodes is None:
1369
        # Find all nodes that appeared as destinations
1370
        dst_nodes = defaultdict(list)
1371
1372
        for etype in g.canonical_etypes:
            _, dst = g.edges(etype=etype)
1373
1374
1375
1376
            dst_nodes[etype[2]].append(dst)
        dst_nodes = {ntype: F.unique(F.cat(values, 0)) for ntype, values in dst_nodes.items()}
    elif not isinstance(dst_nodes, Mapping):
        # dst_nodes is a Tensor, check if the g has only one type.
1377
1378
        if len(g.ntypes) > 1:
            raise ValueError(
1379
1380
                'Graph has more than one node type; please specify a dict for dst_nodes.')
        dst_nodes = {g.ntypes[0]: dst_nodes}
1381
1382
1383
    dst_nodes = {
        ntype: utils.toindex(nodes, g._idtype_str).tousertensor()
        for ntype, nodes in dst_nodes.items()}
1384

1385
1386
    # dst_nodes is now a dict
    dst_nodes_nd = []
1387
    for ntype in g.ntypes:
1388
        nodes = dst_nodes.get(ntype, None)
1389
        if nodes is not None:
1390
            dst_nodes_nd.append(F.zerocopy_to_dgl_ndarray(nodes))
1391
        else:
1392
            dst_nodes_nd.append(nd.NULL[g._idtype_str])
1393

1394
1395
    new_graph_index, src_nodes_nd, induced_edges_nd = _CAPI_DGLToBlock(
        g._graph, dst_nodes_nd, include_dst_in_src)
1396

1397
1398
    # The new graph duplicates the original node types to SRC and DST sets.
    new_ntypes = ([ntype for ntype in g.ntypes], [ntype for ntype in g.ntypes])
1399
    new_graph = DGLHeteroGraph(new_graph_index, new_ntypes, g.etypes)
1400
    assert new_graph.is_unibipartite  # sanity check
1401
1402

    for i, ntype in enumerate(g.ntypes):
1403
        new_graph.srcnodes[ntype].data[NID] = F.zerocopy_from_dgl_ndarray(src_nodes_nd[i])
1404
1405
1406
1407
        if ntype in dst_nodes:
            new_graph.dstnodes[ntype].data[NID] = dst_nodes[ntype]
        else:
            # For empty dst node sets, still create empty mapping arrays.
1408
            new_graph.dstnodes[ntype].data[NID] = F.tensor([], dtype=g.idtype)
1409
1410

    for i, canonical_etype in enumerate(g.canonical_etypes):
1411
        induced_edges = F.zerocopy_from_dgl_ndarray(induced_edges_nd[i])
1412
        utype, etype, vtype = canonical_etype
1413
        new_canonical_etype = (utype, etype, vtype)
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
        new_graph.edges[new_canonical_etype].data[EID] = induced_edges

    return new_graph

def remove_edges(g, edge_ids):
    """Return a new graph with given edge IDs removed.

    The nodes are preserved.

    Parameters
    ----------
    graph : DGLHeteroGraph
        The graph
    edge_ids : Tensor or dict[etypes, Tensor]
        The edge IDs for each edge type.

    Returns
    -------
    DGLHeteroGraph
        The new graph.
        The edge ID mapping from the new graph to the original graph is stored as
        ``dgl.EID`` on edge features.
    """
    if not isinstance(edge_ids, Mapping):
        if len(g.etypes) != 1:
            raise ValueError(
                "Graph has more than one edge type; specify a dict for edge_id instead.")
        edge_ids = {g.canonical_etypes[0]: edge_ids}

1443
    edge_ids_nd = [nd.NULL[g._idtype_str]] * len(g.etypes)
1444
    for key, value in edge_ids.items():
1445
1446
1447
1448
        if value.dtype != g.idtype:
            # if didn't check, this function still works, but returns wrong result
            raise utils.InconsistentDtypeException("Expect edge id tensors({}) to have \
         the same index type as graph({})".format(value.dtype, g.idtype))
1449
1450
1451
1452
1453
        edge_ids_nd[g.get_etype_id(key)] = F.zerocopy_to_dgl_ndarray(value)
    new_graph_index, induced_eids_nd = _CAPI_DGLRemoveEdges(g._graph, edge_ids_nd)

    new_graph = DGLHeteroGraph(new_graph_index, g.ntypes, g.etypes)
    for i, canonical_etype in enumerate(g.canonical_etypes):
1454
        data = induced_eids_nd[i]
1455
        if len(data) == 0:
1456
1457
1458
1459
            # Empty means that either
            # (1) no edges are removed and edges are not shuffled.
            # (2) all edges are removed.
            # The following statement deals with both cases.
1460
            new_graph.edges[canonical_etype].data[EID] = F.arange(
1461
                0, new_graph.number_of_edges(canonical_etype))
1462
1463
        else:
            new_graph.edges[canonical_etype].data[EID] = F.zerocopy_from_dgl_ndarray(data)
1464
1465
1466

    return new_graph

1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
def in_subgraph(g, nodes):
    """Extract the subgraph containing only the in edges of the given nodes.

    The subgraph keeps the same type schema and the cardinality of the original one.
    Node/edge features are not preserved. The original IDs
    the extracted edges are stored as the `dgl.EID` feature in the returned graph.

    Parameters
    ----------
    g : DGLHeteroGraph
        Full graph structure.
    nodes : tensor or dict
        Node ids to sample neighbors from. The allowed types
        are dictionary of node types to node id tensors, or simply node id tensor if
        the given graph g has only one type of nodes.

    Returns
    -------
    DGLHeteroGraph
        The subgraph.
    """
    if not isinstance(nodes, dict):
        if len(g.ntypes) > 1:
            raise DGLError("Must specify node type when the graph is not homogeneous.")
        nodes = {g.ntypes[0] : nodes}
    nodes_all_types = []
    for ntype in g.ntypes:
        if ntype in nodes:
1495
            nodes_all_types.append(utils.toindex(nodes[ntype], g._idtype_str).todgltensor())
1496
        else:
1497
            nodes_all_types.append(nd.NULL[g._idtype_str])
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533

    subgidx = _CAPI_DGLInSubgraph(g._graph, nodes_all_types)
    induced_edges = subgidx.induced_edges
    ret = DGLHeteroGraph(subgidx.graph, g.ntypes, g.etypes)
    for i, etype in enumerate(ret.canonical_etypes):
        ret.edges[etype].data[EID] = induced_edges[i].tousertensor()
    return ret

def out_subgraph(g, nodes):
    """Extract the subgraph containing only the out edges of the given nodes.

    The subgraph keeps the same type schema and the cardinality of the original one.
    Node/edge features are not preserved. The original IDs
    the extracted edges are stored as the `dgl.EID` feature in the returned graph.

    Parameters
    ----------
    g : DGLHeteroGraph
        Full graph structure.
    nodes : tensor or dict
        Node ids to sample neighbors from. The allowed types
        are dictionary of node types to node id tensors, or simply node id tensor if
        the given graph g has only one type of nodes.

    Returns
    -------
    DGLHeteroGraph
        The subgraph.
    """
    if not isinstance(nodes, dict):
        if len(g.ntypes) > 1:
            raise DGLError("Must specify node type when the graph is not homogeneous.")
        nodes = {g.ntypes[0] : nodes}
    nodes_all_types = []
    for ntype in g.ntypes:
        if ntype in nodes:
1534
            nodes_all_types.append(utils.toindex(nodes[ntype], g._idtype_str).todgltensor())
1535
        else:
1536
            nodes_all_types.append(nd.NULL[g._idtype_str])
1537
1538
1539
1540
1541
1542
1543
1544

    subgidx = _CAPI_DGLOutSubgraph(g._graph, nodes_all_types)
    induced_edges = subgidx.induced_edges
    ret = DGLHeteroGraph(subgidx.graph, g.ntypes, g.etypes)
    for i, etype in enumerate(ret.canonical_etypes):
        ret.edges[etype].data[EID] = induced_edges[i].tousertensor()
    return ret

1545
1546
def to_simple(g, return_counts='count', writeback_mapping=False, copy_ndata=True, copy_edata=False):
    r"""Convert a graph to a simple graph without duplicate edges.
1547

1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
    For a heterograph with multiple edge types, we
    treat edges corresponding
    to each type as a separate graph and convert each
    of them to a simple graph.

    When writeback_mapping=True, an extra mapping is returned.
    For the edges in the original graph,
    a writeback mapping is a tensor recording their new
    ids in the simple graph. If the graph has
    only one edge type, a single tensor is returned.
    If the graph has multiple edge types, a dictionary
    of tensor is returned using canonical edge types
    as the key.

    Given a :class:`dgl.DGLGraph` object, we return
    another :class:`dgl.DGLGraph` object representing the
    simple graph corresponding to it.
1565

1566

1567
1568
    Parameters
    ----------
1569
1570
    g : DGLGraph
        The input graph.
1571
    return_counts : str, optional
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
        If given, the count of each edge in the original graph
        will be stored as edge features under the name
        eturn_counts.
        (Default: "count")
    writeback_mapping: bool, optional
        If True, a write back mapping is returned for each edge
        type subgraph. If False, only the simple graph is returned.
        (Default: False)
    copy_ndata: bool, optional
        If True, the node features of the simple graph are copied
        from the original graph. If False, the simple
        graph will not have any node features.
        (Default: True)
    copy_edata: bool, optional
        If True, the edge features of the simple graph are copied
        from the original graph. If there exists duplicate edges between
        two nodes (u, v), the feature of the edge is randomly selected
        from one of the duplicate edges.
        If False, the simple graph will not have any edge features.
        (Default: False)
1592
1593
1594

    Returns
    -------
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
    DGLGraph
        A simple graph.
    tensor or dict of tensor
        If writeback_mapping is True, the writeback
        mapping is returned. If the graph has only
        one edge type, a tensor is returned. If the
        graph has multiple edge types, a dictionary
        of tensor is return.

    If ``copy_ndata`` is ``True``, same tensors will be used for
    the features of the original graph and the to_simpled graph. As a result, users
    should avoid performing in-place operations on the features of the to_simpled
    graph, which will corrupt the features of the original graph as well. For
    concrete examples, refer to the ``Examples`` section below.
1609
1610
1611

    Examples
    --------
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
    **Homographs or Heterographs with A Single Edge Type**

    Create a graph for demonstrating to_simple API.
    In the original graph, there are multiple edges between 1 and 2.

    >>> import dgl
    >>> import torch as th
    >>> g = dgl.graph((th.tensor([0, 1, 2, 1]), th.tensor([1, 2, 0, 2])))
    >>> g.ndata['h'] = th.tensor([[0.], [1.], [2.]])
    >>> g.edata['h'] = th.tensor([[3.], [4.], [5.], [6.]])

    Convert the graph to a simple graph. The return counts is
    stored in the edge feature 'cnt' and the writeback mapping
    is returned in a tensor.

    >>> sg, wm = dgl.to_simple(g, return_counts='cnt', writeback_mapping=True)
    >>> sg.ndata['h']
    tensor([[0.],
            [1.],
            [2.]])
    >>> u, v, eid = sg.edges(form='all')
    >>> u
    tensor([0, 1, 2])
    >>> v
    tensor([1, 2, 0])
    >>> eid
    tensor([0, 1, 2])
    >>> sg.edata['cnt']
    tensor([1, 2, 1])
    >>> wm
    tensor([0, 1, 2, 1])
    >>> 'h' in g.edata
    False

    **In-place operations on features of one graph will be reflected on features of
    the simple graph, which is dangerous. Out-place operations will not be reflected.**
1648

1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
    >>> sg.ndata['h'] += 1
    >>> g.ndata['h']
    tensor([[1.],
            [2.],
            [3.]])
    >>> g.ndata['h'] += 1
    >>> sg.ndata['h']
    tensor([[2.],
            [3.],
            [4.]])
    >>> sg.ndata['h2'] = th.ones(3, 1)
    >>> 'h2' in g.ndata
    False
1662

1663
    **Heterographs with Multiple Edge Types**
1664

1665
1666
1667
1668
1669
1670
    >>> g = dgl.heterograph({
    >>>     ('user', 'wins', 'user'): (th.tensor([0, 2, 0, 2, 2]), th.tensor([1, 1, 2, 1, 0])),
    >>>     ('user', 'plays', 'game'): (th.tensor([1, 2, 1]), th.tensor([2, 1, 1]))
    >>> })
    >>> g.nodes['game'].data['hv'] = th.ones(3, 1)
    >>> g.edges['plays'].data['he'] = th.zeros(3, 1)
1671

1672
1673
1674
1675
1676
    The to_simple operation is applied to the subgraph
    corresponding to ('user', 'wins', 'user') and the
    subgraph corresponding to ('user', 'plays', 'game').
    The return counts is stored in the default edge feature
    'count'.
1677

1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
    >>> sg, wm = dgl.to_simple(g, copy_ndata=False, writeback_mapping=True)
    >>> sg
    Graph(num_nodes={'game': 3, 'user': 3},
          num_edges={('user', 'wins', 'user'): 4, ('game', 'plays', 'user'): 3},
          metagraph=[('user', 'user'), ('game', 'user')])
    >>> sg.edges(etype='wins')
    (tensor([0, 2, 0, 2]), tensor([1, 1, 2, 0]))
    >>> wm[('user', 'wins', 'user')]
    tensor([0, 1, 2, 1, 3])
    >>> sg.edges(etype='plays')
    (tensor([2, 1, 1]), tensor([1, 2, 1]))
    >>> wm[('user', 'plays', 'game')]
    tensor([0, 1, 2])
    >>> 'hv' in sg.nodes['game'].data
    False
    >>> 'he' in sg.edges['plays'].data
    False
    >>> sg.edata['count']
    {('user', 'wins', 'user'): tensor([1, 2, 1, 1])
     ('user', 'plays', 'game'): tensor([1, 1, 1])}
1698
1699
1700
    """
    simple_graph_index, counts, edge_maps = _CAPI_DGLToSimpleHetero(g._graph)
    simple_graph = DGLHeteroGraph(simple_graph_index, g.ntypes, g.etypes)
1701
1702
    counts = [F.zerocopy_from_dgl_ndarray(count) for count in counts]
    edge_maps = [F.zerocopy_from_dgl_ndarray(edge_map) for edge_map in edge_maps]
1703
1704
1705
1706
1707

    if return_counts is not None:
        for count, canonical_etype in zip(counts, g.canonical_etypes):
            simple_graph.edges[canonical_etype].data[return_counts] = count

1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
    if copy_ndata:
        for ntype in g.ntypes:
            for key in g.nodes[ntype].data:
                simple_graph.nodes[ntype].data[key] = g.nodes[ntype].data[key]

    if copy_edata:
        for i, c_etype in enumerate(g.canonical_etypes):
            for key in g.edges[c_etype].data:
                feat_idx = F.asnumpy(edge_maps[i])
                _, indices = np.unique(feat_idx, return_index=True)
                simple_graph.edges[c_etype].data[key] = \
                    F.gather_row(g.edges[c_etype].data[key],
                                 F.copy_to(F.tensor(indices),
                                           F.context(g.edges[c_etype].data[key])))

    if writeback_mapping:
        # single edge type
        if len(edge_maps) == 1:
            return simple_graph, edge_maps[0]
        # multiple edge type
        else:
            wb_map = {}
            for edge_map, canonical_etype in zip(edge_maps, g.canonical_etypes):
                wb_map[canonical_etype] = edge_map
            return simple_graph, wb_map
1733
1734
1735

    return simple_graph

1736
1737
DGLHeteroGraph.to_simple = to_simple

1738
1739
1740
def as_heterograph(g, ntype='_U', etype='_E'):
    """Convert a DGLGraph to a DGLHeteroGraph with one node and edge type.

1741
    Node and edge features are preserved. Returns 64 bits graph
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783

    Parameters
    ----------
    g : DGLGraph
        The graph
    ntype : str, optional
        The node type name
    etype : str, optional
        The edge type name

    Returns
    -------
    DGLHeteroGraph
        The heterograph.
    """
    hgi = _CAPI_DGLAsHeteroGraph(g._graph)
    hg = DGLHeteroGraph(hgi, [ntype], [etype])
    hg.ndata.update(g.ndata)
    hg.edata.update(g.edata)
    return hg

def as_immutable_graph(hg):
    """Convert a DGLHeteroGraph with one node and edge type into a DGLGraph.

    Node and edge features are preserved.

    Parameters
    ----------
    g : DGLHeteroGraph
        The heterograph

    Returns
    -------
    DGLGraph
        The graph.
    """
    gidx = _CAPI_DGLAsImmutableGraph(hg._graph)
    g = DGLGraph(gidx)
    g.ndata.update(hg.ndata)
    g.edata.update(hg.edata)
    return g

1784
_init_api("dgl.transform")