"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "5b1af9ab8244484c5fc28a89fdbb1557106d897f"
convert.py 34.2 KB
Newer Older
Minjie Wang's avatar
Minjie Wang committed
1
2
3
4
5
6
7
8
9
10
11
"""Module for converting graph from/to other object."""
from collections import defaultdict
import numpy as np
import scipy as sp
import networkx as nx

from . import backend as F
from . import heterograph_index
from .heterograph import DGLHeteroGraph, combine_frames
from . import graph_index
from . import utils
Mufei Li's avatar
Mufei Li committed
12
from .base import NTYPE, ETYPE, NID, EID, DGLError
Minjie Wang's avatar
Minjie Wang committed
13
14
15
16
17

__all__ = [
    'graph',
    'bipartite',
    'hetero_from_relations',
18
    'heterograph',
Minjie Wang's avatar
Minjie Wang committed
19
20
21
22
23
24
    'to_hetero',
    'to_homo',
    'to_networkx',
]

def graph(data, ntype='_N', etype='_E', card=None, **kwargs):
Mufei Li's avatar
Mufei Li committed
25
    """Create a graph with one type of nodes and edges.
Minjie Wang's avatar
Minjie Wang committed
26
27
28
29
30

    In the sparse matrix perspective, :func:`dgl.graph` creates a graph
    whose adjacency matrix must be square while :func:`dgl.bipartite`
    creates a graph that does not necessarily have square adjacency matrix.

Mufei Li's avatar
Mufei Li committed
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
    Parameters
    ----------
    data : graph data
        Data to initialize graph structure. Supported data formats are

        (1) list of edge pairs (e.g. [(0, 2), (3, 1), ...])
        (2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...],  [2, 1, ...]))
        (3) scipy sparse matrix
        (4) networkx graph

    ntype : str, optional
        Node type name. (Default: _N)
    etype : str, optional
        Edge type name. (Default: _E)
    card : int, optional
        Cardinality (number of nodes in the graph). If None, infer from input data, i.e.
        the largest node ID plus 1. (Default: None)
    kwargs : key-word arguments, optional
        Other key word arguments. Only comes into effect when we are using a NetworkX
        graph. It can consist of:

        * edge_id_attr_name
            ``Str``, key name for edge ids in the NetworkX graph. If not found, we
            will consider the graph not to have pre-specified edge ids.
        * node_attrs
            ``List of str``, names for node features to retrieve from the NetworkX graph
        * edge_attrs
            ``List of str``, names for edge features to retrieve from the NetworkX graph

    Returns
    -------
    DGLHeteroGraph

Minjie Wang's avatar
Minjie Wang committed
64
65
    Examples
    --------
Mufei Li's avatar
Mufei Li committed
66
    Create from pairs of edges with form (src, dst)
Minjie Wang's avatar
Minjie Wang committed
67
68
69

    >>> g = dgl.graph([(0, 2), (0, 3), (1, 2)])

Mufei Li's avatar
Mufei Li committed
70
    Create from source and destination vertex ID lists
Minjie Wang's avatar
Minjie Wang committed
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130

    >>> u = [0, 0, 1]
    >>> v = [2, 3, 2]
    >>> g = dgl.graph((u, v))

    The IDs can also be stored in framework-specific tensors

    >>> import torch
    >>> u = torch.tensor([0, 0, 1])
    >>> v = torch.tensor([2, 3, 2])
    >>> g = dgl.graph((u, v))

    Create from scipy sparse matrix

    >>> from scipy.sparse import coo_matrix
    >>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
    >>> g = dgl.graph(spmat)

    Create from networkx graph

    >>> import networkx as nx
    >>> nxg = nx.path_graph(3)
    >>> g = dgl.graph(nxg)

    Specify node and edge type names

    >>> g = dgl.graph(..., 'user', 'follows')
    >>> g.ntypes
    ['user']
    >>> g.etypes
    ['follows']
    >>> g.canonical_etypes
    [('user', 'follows', 'user')]
    """
    if card is not None:
        urange, vrange = card, card
    else:
        urange, vrange = None, None
    if isinstance(data, tuple):
        u, v = data
        return create_from_edges(u, v, ntype, etype, ntype, urange, vrange)
    elif isinstance(data, list):
        return create_from_edge_list(data, ntype, etype, ntype, urange, vrange)
    elif isinstance(data, sp.sparse.spmatrix):
        return create_from_scipy(data, ntype, etype, ntype)
    elif isinstance(data, nx.Graph):
        return create_from_networkx(data, ntype, etype, **kwargs)
    else:
        raise DGLError('Unsupported graph data type:', type(data))

def bipartite(data, utype='_U', etype='_E', vtype='_V', card=None, **kwargs):
    """Create a bipartite graph.

    The result graph is directed and edges must be from ``utype`` nodes
    to ``vtype`` nodes. Nodes of each type have their own ID counts.

    In the sparse matrix perspective, :func:`dgl.graph` creates a graph
    whose adjacency matrix must be square while :func:`dgl.bipartite`
    creates a graph that does not necessarily have square adjacency matrix.

Mufei Li's avatar
Mufei Li committed
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
    Parameters
    ----------
    data : graph data
        Data to initialize graph structure. Supported data formats are

        (1) list of edge pairs (e.g. [(0, 2), (3, 1), ...])
        (2) pair of vertex IDs representing end nodes (e.g. ([0, 3, ...],  [2, 1, ...]))
        (3) scipy sparse matrix
        (4) networkx graph

    utype : str, optional
        Source node type name. (Default: _U)
    etype : str, optional
        Edge type name. (Default: _E)
    vtype : str, optional
        Destination node type name. (Default: _V)
    card : pair of int, optional
        Cardinality (number of nodes in the source and destination group). If None,
        infer from input data, i.e. the largest node ID plus 1 for each type. (Default: None)
    kwargs : key-word arguments, optional
        Other key word arguments. Only comes into effect when we are using a NetworkX
        graph. It can consist of:

        * edge_id_attr_name
            ``Str``, key name for edge ids in the NetworkX graph. If not found, we
            will consider the graph not to have pre-specified edge ids.

    Returns
    -------
    DGLHeteroGraph

Minjie Wang's avatar
Minjie Wang committed
162
163
    Examples
    --------
Mufei Li's avatar
Mufei Li committed
164
    Create from pairs of edges
Minjie Wang's avatar
Minjie Wang committed
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179

    >>> g = dgl.bipartite([(0, 2), (0, 3), (1, 2)], 'user', 'plays', 'game')
    >>> g.ntypes
    ['user', 'game']
    >>> g.etypes
    ['plays']
    >>> g.canonical_etypes
    [('user', 'plays', 'game')]
    >>> g.number_of_nodes('user')
    2
    >>> g.number_of_nodes('game')
    4
    >>> g.number_of_edges('plays')  # 'plays' could be omitted here
    3

Mufei Li's avatar
Mufei Li committed
180
    Create from source and destination vertex ID lists
Minjie Wang's avatar
Minjie Wang committed
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

    >>> u = [0, 0, 1]
    >>> v = [2, 3, 2]
    >>> g = dgl.bipartite((u, v))

    The IDs can also be stored in framework-specific tensors

    >>> import torch
    >>> u = torch.tensor([0, 0, 1])
    >>> v = torch.tensor([2, 3, 2])
    >>> g = dgl.bipartite((u, v))

    Create from scipy sparse matrix. Since scipy sparse matrix has explicit
    shape, the cardinality of the result graph is derived from that.

    >>> from scipy.sparse import coo_matrix
    >>> spmat = coo_matrix(([1,1,1], ([0, 0, 1], [2, 3, 2])), shape=(4, 4))
    >>> g = dgl.bipartite(spmat, 'user', 'plays', 'game')
    >>> g.number_of_nodes('user')
    4
    >>> g.number_of_nodes('game')
    4

    Create from networkx graph. The given graph must follow the bipartite
    graph convention in networkx. Each node has a ``bipartite`` attribute
    with values 0 or 1. The result graph has two types of nodes and only
    edges from ``bipartite=0`` to ``bipartite=1`` will be included.

    >>> import networkx as nx
    >>> nxg = nx.complete_bipartite_graph(3, 4)
Mufei Li's avatar
Mufei Li committed
211
    >>> g = dgl.bipartite(nxg, 'user', 'plays', 'game')
Minjie Wang's avatar
Minjie Wang committed
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    >>> g.number_of_nodes('user')
    3
    >>> g.number_of_nodes('game')
    4
    >>> g.edges()
    (tensor([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2]), tensor([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]))
    """
    if utype == vtype:
        raise DGLError('utype should not be equal to vtype. Use ``dgl.graph`` instead.')
    if card is not None:
        urange, vrange = card
    else:
        urange, vrange = None, None
    if isinstance(data, tuple):
        u, v = data
        return create_from_edges(u, v, utype, etype, vtype, urange, vrange)
    elif isinstance(data, list):
        return create_from_edge_list(data, utype, etype, vtype, urange, vrange)
    elif isinstance(data, sp.sparse.spmatrix):
        return create_from_scipy(data, utype, etype, vtype)
    elif isinstance(data, nx.Graph):
        return create_from_networkx_bipartite(data, utype, etype, vtype, **kwargs)
    else:
        raise DGLError('Unsupported graph data type:', type(data))

def hetero_from_relations(rel_graphs):
    """Create a heterograph from per-relation graphs.

    Parameters
    ----------
    rel_graphs : list of DGLHeteroGraph
Mufei Li's avatar
Mufei Li committed
243
        Each element corresponds to a heterograph for one (src, edge, dst) relation.
Minjie Wang's avatar
Minjie Wang committed
244
245
246
247

    Returns
    -------
    DGLHeteroGraph
Mufei Li's avatar
Mufei Li committed
248
        A heterograph consisting of all relations.
Minjie Wang's avatar
Minjie Wang committed
249
    """
Mufei Li's avatar
Mufei Li committed
250
251
    # TODO(minjie): this API can be generalized as a union operation of the input graphs
    # TODO(minjie): handle node/edge data
Minjie Wang's avatar
Minjie Wang committed
252
253
254
255
256
257
258
259
    # infer meta graph
    ntype_dict = {}  # ntype -> ntid
    meta_edges = []
    ntypes = []
    etypes = []
    for rgrh in rel_graphs:
        assert len(rgrh.etypes) == 1
        stype, etype, dtype = rgrh.canonical_etypes[0]
Mufei Li's avatar
Mufei Li committed
260
        if stype not in ntype_dict:
Minjie Wang's avatar
Minjie Wang committed
261
262
263
            ntype_dict[stype] = len(ntypes)
            ntypes.append(stype)
        stid = ntype_dict[stype]
Mufei Li's avatar
Mufei Li committed
264
        if dtype not in ntype_dict:
Minjie Wang's avatar
Minjie Wang committed
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
            ntype_dict[dtype] = len(ntypes)
            ntypes.append(dtype)
        dtid = ntype_dict[dtype]
        meta_edges.append((stid, dtid))
        etypes.append(etype)
    metagraph = graph_index.from_edge_list(meta_edges, True, True)
    # create graph index
    hgidx = heterograph_index.create_heterograph_from_relations(
        metagraph, [rgrh._graph for rgrh in rel_graphs])
    retg = DGLHeteroGraph(hgidx, ntypes, etypes)
    for i, rgrh in enumerate(rel_graphs):
        for ntype in rgrh.ntypes:
            retg.nodes[ntype].data.update(rgrh.nodes[ntype].data)
        retg._edge_frames[i].update(rgrh._edge_frames[0])
    return retg

281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
def heterograph(data_dict, num_nodes_dict=None):
    """Create a heterogeneous graph from a dictionary between edge types and edge lists.

    Parameters
    ----------
    data_dict : dict
        The dictionary between edge types and edge list data.

        The edge types are specified as a triplet of (source node type name, edge type
        name, destination node type name).

        The edge list data can be anything acceptable by :func:`dgl.graph` or
        :func:`dgl.bipartite`, or objects returned by the two functions themselves.
    num_nodes_dict : dict[str, int]
        The number of nodes for each node type.

        By default DGL infers the number of nodes for each node type from ``data_dict``
        by taking the maximum node ID plus one for each node type.

    Returns
    -------
    DGLHeteroGraph

    Examples
    --------
    >>> g = dgl.heterograph({
    ...     ('user', 'follows', 'user'): [(0, 1), (1, 2)],
    ...     ('user', 'plays', 'game'): [(0, 0), (1, 0), (1, 1), (2, 1)],
    ...     ('developer', 'develops', 'game'): [(0, 0), (1, 1)],
    ...     })
    """
    rel_graphs = []

    # infer number of nodes for each node type
    if num_nodes_dict is None:
        num_nodes_dict = defaultdict(int)
        for (srctype, etype, dsttype), data in data_dict.items():
            if isinstance(data, tuple):
                nsrc = max(data[0]) + 1
                ndst = max(data[1]) + 1
            elif isinstance(data, list):
                src, dst = zip(*data)
                nsrc = max(src) + 1
                ndst = max(dst) + 1
            elif isinstance(data, sp.sparse.spmatrix):
                nsrc = data.shape[0]
                ndst = data.shape[1]
            elif isinstance(data, nx.Graph):
                if srctype == dsttype:
                    nsrc = ndst = data.number_of_nodes()
                else:
                    nsrc = len({n for n, d in data.nodes(data=True) if d['bipartite'] == 0})
                    ndst = data.number_of_nodes() - nsrc
            elif isinstance(data, DGLHeteroGraph):
                # Do nothing; handled in the next loop
336
                continue
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
            else:
                raise DGLError('Unsupported graph data type %s for %s' % (
                    type(data), (srctype, etype, dsttype)))
            if srctype == dsttype:
                ndst = nsrc = max(nsrc, ndst)

            num_nodes_dict[srctype] = max(num_nodes_dict[srctype], nsrc)
            num_nodes_dict[dsttype] = max(num_nodes_dict[dsttype], ndst)

    for (srctype, etype, dsttype), data in data_dict.items():
        if isinstance(data, DGLHeteroGraph):
            rel_graphs.append(data)
        elif srctype == dsttype:
            rel_graphs.append(graph(data, srctype, etype, card=num_nodes_dict[srctype]))
        else:
            rel_graphs.append(bipartite(
                data, srctype, etype, dsttype,
                card=(num_nodes_dict[srctype], num_nodes_dict[dsttype])))

    return hetero_from_relations(rel_graphs)

Minjie Wang's avatar
Minjie Wang committed
358
def to_hetero(G, ntypes, etypes, ntype_field=NTYPE, etype_field=ETYPE, metagraph=None):
Mufei Li's avatar
Mufei Li committed
359
    """Convert the given homogeneous graph to a heterogeneous graph.
Minjie Wang's avatar
Minjie Wang committed
360
361
362

    The input graph should have only one type of nodes and edges. Each node and edge
    stores an integer feature (under ``ntype_field`` and ``etype_field``), representing
Mufei Li's avatar
Mufei Li committed
363
    the type id, which can be used to retrieve the type names stored
Minjie Wang's avatar
Minjie Wang committed
364
365
    in the given ``ntypes`` and ``etypes`` arguments.

366
367
    The function will automatically distinguish edge types that have the same given
    type IDs but different src and dst type IDs. For example, we allow both edges A and B
Mufei Li's avatar
Mufei Li committed
368
369
    to have the same type ID 0, but one has (0, 1) and the other as (2, 3) as the
    (src, dst) type IDs. In this case, the function will "split" edge type 0 into two types:
370
371
372
    (0, ty_A, 1) and (2, ty_B, 3). In another word, these two edges share the same edge
    type name, but can be distinguished by a canonical edge type tuple.

Minjie Wang's avatar
Minjie Wang committed
373
374
375
    Parameters
    ----------
    G : DGLHeteroGraph
Mufei Li's avatar
Mufei Li committed
376
        Input homogeneous graph.
Minjie Wang's avatar
Minjie Wang committed
377
378
379
380
381
    ntypes : list of str
        The node type names.
    etypes : list of str
        The edge type names.
    ntype_field : str, optional
Mufei Li's avatar
Mufei Li committed
382
        The feature field used to store node type. (Default: ``dgl.NTYPE``)
Minjie Wang's avatar
Minjie Wang committed
383
    etype_field : str, optional
Mufei Li's avatar
Mufei Li committed
384
        The feature field used to store edge type. (Default: ``dgl.ETYPE``)
Minjie Wang's avatar
Minjie Wang committed
385
386
387
388
389
390
391
392
393
    metagraph : networkx MultiDiGraph, optional
        Metagraph of the returned heterograph.
        If provided, DGL assumes that G can indeed be described with the given metagraph.
        If None, DGL will infer the metagraph from the given inputs, which would be
        potentially slower for large graphs.

    Returns
    -------
    DGLHeteroGraph
Mufei Li's avatar
Mufei Li committed
394
395
        A heterograph. The parent node and edge ID are stored in the column
        ``dgl.NID`` and ``dgl.EID`` respectively for all node/edge types.
Minjie Wang's avatar
Minjie Wang committed
396
397
398
399
400
401
402
403

    Notes
    -----
    The returned node and edge types may not necessarily be in the same order as
    ``ntypes`` and ``etypes``.  And edge types may be duplicated if the source
    and destination types differ.

    The node IDs of a single type in the returned heterogeneous graph is ordered
Mufei Li's avatar
Mufei Li committed
404
    the same as the nodes with the same ``ntype_field`` feature. Edge IDs of
Minjie Wang's avatar
Minjie Wang committed
405
    a single type is similar.
Mufei Li's avatar
Mufei Li committed
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444

    Examples
    --------

    >>> g1 = dgl.bipartite([(0, 1), (1, 2)], 'user', 'develops', 'activity')
    >>> g2 = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
    >>> hetero_g = dgl.hetero_from_relations([g1, g2])
    >>> print(hetero_g)
    Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
        num_edges={'develops': 2},
        metagraph=[('user', 'activity'), ('developer', 'game')])

    We first convert the heterogeneous graph to a homogeneous graph.

    >>> homo_g = dgl.to_homo(hetero_g)
    >>> print(homo_g)
    Graph(num_nodes=9, num_edges=4,
        ndata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
                       '_ID': Scheme(shape=(), dtype=torch.int64)}
        edata_schemes={'_TYPE': Scheme(shape=(), dtype=torch.int64),
                       '_ID': Scheme(shape=(), dtype=torch.int64)})
    >>> homo_g.ndata
    {'_TYPE': tensor([0, 0, 1, 1, 1, 2, 2, 3, 3]), '_ID': tensor([0, 1, 0, 1, 2, 0, 1, 0, 1])}
    Nodes 0, 1 for 'user', 2, 3, 4 for 'activity', 5, 6 for 'developer', 7, 8 for 'game'
    >>> homo_g.edata
    {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
    Edges 0, 1 for ('user', 'develops', 'activity'), 2, 3 for ('developer', 'develops', 'game')

    Now convert the homogeneous graph back to a heterogeneous graph.

    >>> hetero_g_2 = dgl.to_hetero(homo_g, hetero_g.ntypes, hetero_g.etypes)
    >>> print(hetero_g_2)
    Graph(num_nodes={'user': 2, 'activity': 3, 'developer': 2, 'game': 2},
        num_edges={'develops': 2},
        metagraph=[('user', 'activity'), ('developer', 'game')])

    See Also
    --------
    dgl.to_homo
Minjie Wang's avatar
Minjie Wang committed
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
    """
    # TODO(minjie): use hasattr to support DGLGraph input; should be fixed once
    #  DGLGraph is merged with DGLHeteroGraph
    if (hasattr(G, 'ntypes') and len(G.ntypes) > 1
            or hasattr(G, 'etypes') and len(G.etypes) > 1):
        raise DGLError('The input graph should be homogenous and have only one '
                       ' type of nodes and edges.')

    num_ntypes = len(ntypes)

    ntype_ids = F.asnumpy(G.ndata[ntype_field])
    etype_ids = F.asnumpy(G.edata[etype_field])

    # relabel nodes to per-type local IDs
    ntype_count = np.bincount(ntype_ids, minlength=num_ntypes)
    ntype_offset = np.insert(np.cumsum(ntype_count), 0, 0)
    ntype_ids_sortidx = np.argsort(ntype_ids)
    ntype_local_ids = np.zeros_like(ntype_ids)
    node_groups = []
    for i in range(num_ntypes):
        node_group = ntype_ids_sortidx[ntype_offset[i]:ntype_offset[i+1]]
        node_groups.append(node_group)
        ntype_local_ids[node_group] = np.arange(ntype_count[i])

    src, dst = G.all_edges(order='eid')
    src = F.asnumpy(src)
    dst = F.asnumpy(dst)
    src_local = ntype_local_ids[src]
    dst_local = ntype_local_ids[dst]
474
475
476
477
478
479
480
481
482
    # a 2D tensor of shape (E, 3). Each row represents the (stid, etid, dtid) tuple.
    edge_ctids = np.stack([ntype_ids[src], etype_ids, ntype_ids[dst]], 1)

    # infer metagraph and canonical edge types
    # No matter which branch it takes, the code will generate a 2D tensor of shape (E_m, 3),
    # E_m is the set of all possible canonical edge tuples. Each row represents the
    # (stid, dtid, dtid) tuple. We then compute a 2D tensor of shape (E, E_m) using the
    # above ``edge_ctids`` matrix. Each element i,j indicates whether the edge i is of the
    # canonical edge type j. We can then group the edges of the same type together.
Minjie Wang's avatar
Minjie Wang committed
483
484
    if metagraph is None:
        canonical_etids, _, etype_remapped = \
485
                utils.make_invmap(list(tuple(_) for _ in edge_ctids), False)
Minjie Wang's avatar
Minjie Wang committed
486
487
488
489
490
491
492
493
494
495
496
        etype_mask = (etype_remapped[None, :] == np.arange(len(canonical_etids))[:, None])
    else:
        ntypes_invmap = {nt: i for i, nt in enumerate(ntypes)}
        etypes_invmap = {et: i for i, et in enumerate(etypes)}
        canonical_etids = []
        for i, (srctype, dsttype, etype) in enumerate(metagraph.edges(keys=True)):
            srctype_id = ntypes_invmap[srctype]
            etype_id = etypes_invmap[etype]
            dsttype_id = ntypes_invmap[dsttype]
            canonical_etids.append((srctype_id, etype_id, dsttype_id))
        canonical_etids = np.array(canonical_etids)
497
        etype_mask = (edge_ctids[None, :] == canonical_etids[:, None]).all(2)
Minjie Wang's avatar
Minjie Wang committed
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
    edge_groups = [etype_mask[i].nonzero()[0] for i in range(len(canonical_etids))]

    rel_graphs = []
    for i, (stid, etid, dtid) in enumerate(canonical_etids):
        src_of_etype = src_local[edge_groups[i]]
        dst_of_etype = dst_local[edge_groups[i]]
        if stid == dtid:
            rel_graph = graph(
                (src_of_etype, dst_of_etype), ntypes[stid], etypes[etid],
                card=ntype_count[stid])
        else:
            rel_graph = bipartite(
                (src_of_etype, dst_of_etype), ntypes[stid], etypes[etid], ntypes[dtid],
                card=(ntype_count[stid], ntype_count[dtid]))
        rel_graphs.append(rel_graph)

    hg = hetero_from_relations(rel_graphs)

    ntype2ngrp = {ntype : node_groups[ntid] for ntid, ntype in enumerate(ntypes)}
    for ntid, ntype in enumerate(hg.ntypes):
        hg._node_frames[ntid][NID] = F.tensor(ntype2ngrp[ntype])

    for etid in range(len(hg.canonical_etypes)):
        hg._edge_frames[etid][EID] = F.tensor(edge_groups[etid])

    # features
    for key, data in G.ndata.items():
        for ntid, ntype in enumerate(hg.ntypes):
            rows = F.copy_to(F.tensor(ntype2ngrp[ntype]), F.context(data))
            hg._node_frames[ntid][key] = F.gather_row(data, rows)
    for key, data in G.edata.items():
        for etid in range(len(hg.canonical_etypes)):
            rows = F.copy_to(F.tensor(edge_groups[etid]), F.context(data))
            hg._edge_frames[etid][key] = F.gather_row(data, rows)

    return hg

def to_homo(G):
Mufei Li's avatar
Mufei Li committed
536
    """Convert the given heterogeneous graph to a homogeneous graph.
Minjie Wang's avatar
Minjie Wang committed
537

Mufei Li's avatar
Mufei Li committed
538
    The returned graph has only one type of nodes and edges.
Minjie Wang's avatar
Minjie Wang committed
539
540
541
542
543
544
545
546

    Node and edge types are stored as features in the returned graph. Each feature
    is an integer representing the type id, which can be used to retrieve the type
    names stored in ``G.ntypes`` and ``G.etypes`` arguments.

    Parameters
    ----------
    G : DGLHeteroGraph
Mufei Li's avatar
Mufei Li committed
547
        Input heterogeneous graph.
Minjie Wang's avatar
Minjie Wang committed
548
549
550
551

    Returns
    -------
    DGLHeteroGraph
Mufei Li's avatar
Mufei Li committed
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
        A homogeneous graph. The parent node and edge type/ID are stored in
        columns ``dgl.NTYPE/dgl.NID`` and ``dgl.ETYPE/dgl.EID`` respectively.

    Examples
    --------

    >>> follows_g = dgl.graph([(0, 1), (1, 2)], 'user', 'follows')
    >>> devs_g = dgl.bipartite([(0, 0), (1, 1)], 'developer', 'develops', 'game')
    >>> hetero_g = dgl.hetero_from_relations([follows_g, devs_g])
    >>> homo_g = dgl.to_homo(hetero_g)
    >>> homo_g.ndata
    {'_TYPE': tensor([0, 0, 0, 1, 1, 2, 2]), '_ID': tensor([0, 1, 2, 0, 1, 0, 1])}
    First three nodes for 'user', next two for 'developer' and the last two for 'game'
    >>> homo_g.edata
    {'_TYPE': tensor([0, 0, 1, 1]), '_ID': tensor([0, 1, 0, 1])}
    First two edges for 'follows', next two for 'develops'

    See Also
    --------
    dgl.to_hetero
Minjie Wang's avatar
Minjie Wang committed
572
573
574
575
576
577
578
579
580
    """
    num_nodes_per_ntype = [G.number_of_nodes(ntype) for ntype in G.ntypes]
    offset_per_ntype = np.insert(np.cumsum(num_nodes_per_ntype), 0, 0)
    srcs = []
    dsts = []
    etype_ids = []
    eids = []
    ntype_ids = []
    nids = []
581
    total_num_nodes = 0
Minjie Wang's avatar
Minjie Wang committed
582
583
584

    for ntype_id, ntype in enumerate(G.ntypes):
        num_nodes = G.number_of_nodes(ntype)
585
        total_num_nodes += num_nodes
Minjie Wang's avatar
Minjie Wang committed
586
587
588
589
590
591
592
593
594
595
596
597
        ntype_ids.append(F.full_1d(num_nodes, ntype_id, F.int64, F.cpu()))
        nids.append(F.arange(0, num_nodes))

    for etype_id, etype in enumerate(G.canonical_etypes):
        srctype, _, dsttype = etype
        src, dst = G.all_edges(etype=etype, order='eid')
        num_edges = len(src)
        srcs.append(src + offset_per_ntype[G.get_ntype_id(srctype)])
        dsts.append(dst + offset_per_ntype[G.get_ntype_id(dsttype)])
        etype_ids.append(F.full_1d(num_edges, etype_id, F.int64, F.cpu()))
        eids.append(F.arange(0, num_edges))

598
    retg = graph((F.cat(srcs, 0), F.cat(dsts, 0)), card=total_num_nodes)
Minjie Wang's avatar
Minjie Wang committed
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
    retg.ndata[NTYPE] = F.cat(ntype_ids, 0)
    retg.ndata[NID] = F.cat(nids, 0)
    retg.edata[ETYPE] = F.cat(etype_ids, 0)
    retg.edata[EID] = F.cat(eids, 0)

    # features
    comb_nf = combine_frames(G._node_frames, range(len(G.ntypes)))
    comb_ef = combine_frames(G._edge_frames, range(len(G.etypes)))
    if comb_nf is not None:
        retg.ndata.update(comb_nf)
    if comb_ef is not None:
        retg.edata.update(comb_ef)

    return retg

############################################################
# Internal APIs
############################################################

def create_from_edges(u, v, utype, etype, vtype, urange=None, vrange=None):
    """Internal function to create a graph from incident nodes with types.

    utype could be equal to vtype

    Parameters
    ----------
    u : iterable of int
        List of source node IDs.
    v : iterable of int
        List of destination node IDs.
    utype : str
        Source node type name.
    etype : str
        Edge type name.
    vtype : str
        Destination node type name.
    urange : int, optional
        The source node ID range. If None, the value is the maximum
        of the source node IDs in the edge list plus 1. (Default: None)
    vrange : int, optional
        The destination node ID range. If None, the value is the
        maximum of the destination node IDs in the edge list plus 1. (Default: None)

    Returns
Mufei Li's avatar
Mufei Li committed
643
    -------
Minjie Wang's avatar
Minjie Wang committed
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
    DGLHeteroGraph
    """
    u = utils.toindex(u)
    v = utils.toindex(v)
    urange = urange or (int(F.asnumpy(F.max(u.tousertensor(), dim=0))) + 1)
    vrange = vrange or (int(F.asnumpy(F.max(v.tousertensor(), dim=0))) + 1)
    if utype == vtype:
        urange = vrange = max(urange, vrange)
        num_ntypes = 1
    else:
        num_ntypes = 2
    hgidx = heterograph_index.create_unitgraph_from_coo(num_ntypes, urange, vrange, u, v)
    if utype == vtype:
        return DGLHeteroGraph(hgidx, [utype], [etype])
    else:
        return DGLHeteroGraph(hgidx, [utype, vtype], [etype])

def create_from_edge_list(elist, utype, etype, vtype, urange=None, vrange=None):
Mufei Li's avatar
Mufei Li committed
662
    """Internal function to create a heterograph from a list of edge tuples with types.
Minjie Wang's avatar
Minjie Wang committed
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695

    utype could be equal to vtype

    Parameters
    ----------
    elist : iterable of int pairs
        List of (src, dst) node ID pairs.
    utype : str
        Source node type name.
    etype : str
        Edge type name.
    vtype : str
        Destination node type name.
    urange : int, optional
        The source node ID range. If None, the value is the maximum
        of the source node IDs in the edge list plus 1. (Default: None)
    vrange : int, optional
        The destination node ID range. If None, the value is the
        maximum of the destination node IDs in the edge list plus 1. (Default: None)

    Returns
    -------
    DGLHeteroGraph
    """
    if len(elist) == 0:
        u, v = [], []
    else:
        u, v = zip(*elist)
        u = list(u)
        v = list(v)
    return create_from_edges(u, v, utype, etype, vtype, urange, vrange)

def create_from_scipy(spmat, utype, etype, vtype, with_edge_id=False):
Mufei Li's avatar
Mufei Li committed
696
    """Internal function to create a heterograph from a scipy sparse matrix with types.
Minjie Wang's avatar
Minjie Wang committed
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742

    Parameters
    ----------
    spmat : scipy.sparse.spmatrix
        The adjacency matrix whose rows represent sources and columns
        represent destinations.
    utype : str
        Source node type name.
    etype : str
        Edge type name.
    vtype : str
        Destination node type name.
    with_edge_id : bool
        If True, the entries in the sparse matrix are treated as edge IDs.
        Otherwise, the entries are ignored and edges will be added in
        (source, destination) order.

    Returns
    -------
    DGLHeteroGraph
    """
    num_src, num_dst = spmat.shape
    num_ntypes = 1 if utype == vtype else 2
    if spmat.getformat() == 'coo':
        row = utils.toindex(spmat.row)
        col = utils.toindex(spmat.col)
        hgidx = heterograph_index.create_unitgraph_from_coo(
            num_ntypes, num_src, num_dst, row, col)
    else:
        spmat = spmat.tocsr()
        indptr = utils.toindex(spmat.indptr)
        indices = utils.toindex(spmat.indices)
        # TODO(minjie): with_edge_id is only reasonable for csr matrix. How to fix?
        data = utils.toindex(spmat.data if with_edge_id else list(range(len(indices))))
        hgidx = heterograph_index.create_unitgraph_from_csr(
            num_ntypes, num_src, num_dst, indptr, indices, data)
    if num_ntypes == 1:
        return DGLHeteroGraph(hgidx, [utype], [etype])
    else:
        return DGLHeteroGraph(hgidx, [utype, vtype], [etype])

def create_from_networkx(nx_graph,
                         ntype, etype,
                         edge_id_attr_name='id',
                         node_attrs=None,
                         edge_attrs=None):
Mufei Li's avatar
Mufei Li committed
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
    """Create a heterograph that has only one set of nodes and edges.

    Parameters
    ----------
    nx_graph : NetworkX graph
    ntype : str
        Type name for both source and destination nodes
    etype : str
        Type name for edges
    edge_id_attr_name : str, optional
        Key name for edge ids in the NetworkX graph. If not found, we
        will consider the graph not to have pre-specified edge ids. (Default: 'id')
    node_attrs : list of str
        Names for node features to retrieve from the NetworkX graph (Default: None)
    edge_attrs : list of str
        Names for edge features to retrieve from the NetworkX graph (Default: None)

    Returns
    -------
    g : DGLHeteroGraph
Minjie Wang's avatar
Minjie Wang committed
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
    """
    if not nx_graph.is_directed():
        nx_graph = nx_graph.to_directed()

    # Relabel nodes using consecutive integers
    nx_graph = nx.convert_node_labels_to_integers(nx_graph, ordering='sorted')

    # nx_graph.edges(data=True) returns src, dst, attr_dict
    if nx_graph.number_of_edges() > 0:
        has_edge_id = edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]
    else:
        has_edge_id = False

    if has_edge_id:
        num_edges = nx_graph.number_of_edges()
        src = np.zeros((num_edges,), dtype=np.int64)
        dst = np.zeros((num_edges,), dtype=np.int64)
        for u, v, attr in nx_graph.edges(data=True):
            eid = attr[edge_id_attr_name]
            src[eid] = u
            dst[eid] = v
    else:
        src = []
        dst = []
        for e in nx_graph.edges:
            src.append(e[0])
            dst.append(e[1])
    src = utils.toindex(src)
    dst = utils.toindex(dst)
    num_nodes = nx_graph.number_of_nodes()
    g = create_from_edges(src, dst, ntype, etype, ntype, num_nodes, num_nodes)

    # handle features
    # copy attributes
    def _batcher(lst):
        if F.is_tensor(lst[0]):
            return F.cat([F.unsqueeze(x, 0) for x in lst], dim=0)
        else:
            return F.tensor(lst)
    if node_attrs is not None:
        # mapping from feature name to a list of tensors to be concatenated
        attr_dict = defaultdict(list)
        for nid in range(g.number_of_nodes()):
            for attr in node_attrs:
                attr_dict[attr].append(nx_graph.nodes[nid][attr])
        for attr in node_attrs:
            g.ndata[attr] = _batcher(attr_dict[attr])

    if edge_attrs is not None:
        # mapping from feature name to a list of tensors to be concatenated
        attr_dict = defaultdict(lambda: [None] * g.number_of_edges())
        # each defaultdict value is initialized to be a list of None
        # None here serves as placeholder to be replaced by feature with
        # corresponding edge id
        if has_edge_id:
            num_edges = g.number_of_edges()
            for _, _, attrs in nx_graph.edges(data=True):
                if attrs[edge_id_attr_name] >= num_edges:
                    raise DGLError('Expect the pre-specified edge ids to be'
                                   ' smaller than the number of edges --'
                                   ' {}, got {}.'.format(num_edges, attrs['id']))
                for key in edge_attrs:
                    attr_dict[key][attrs['id']] = attrs[key]
        else:
            # XXX: assuming networkx iteration order is deterministic
            #      so the order is the same as graph_index.from_networkx
            for eid, (_, _, attrs) in enumerate(nx_graph.edges(data=True)):
                for key in edge_attrs:
                    attr_dict[key][eid] = attrs[key]
        for attr in edge_attrs:
            for val in attr_dict[attr]:
                if val is None:
                    raise DGLError('Not all edges have attribute {}.'.format(attr))
            g.edata[attr] = _batcher(attr_dict[attr])

    return g

def create_from_networkx_bipartite(nx_graph,
                                   utype, etype, vtype,
                                   edge_id_attr_name='id',
                                   node_attrs=None,
                                   edge_attrs=None):
Mufei Li's avatar
Mufei Li committed
845
846
    """Create a heterograph that has one set of source nodes, one set of
    destination nodes and one set of edges.
Minjie Wang's avatar
Minjie Wang committed
847

Mufei Li's avatar
Mufei Li committed
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
    Parameters
    ----------
    nx_graph : NetworkX graph
        The input graph must follow the bipartite graph convention of networkx.
        Each node has an attribute ``bipartite`` with values 0 and 1 indicating
        which set it belongs to. Only edges from node set 0 to node set 1 are
        added to the returned graph.
    utype : str
        Source node type name.
    etype : str
        Edge type name.
    vtype : str
        Destination node type name.
    edge_id_attr_name : str, optional
        Key name for edge ids in the NetworkX graph. If not found, we
        will consider the graph not to have pre-specified edge ids. (Default: 'id')
    node_attrs : list of str
        Names for node features to retrieve from the NetworkX graph (Default: None)
    edge_attrs : list of str
        Names for edge features to retrieve from the NetworkX graph (Default: None)
Minjie Wang's avatar
Minjie Wang committed
868

Mufei Li's avatar
Mufei Li committed
869
870
871
    Returns
    -------
    g : DGLHeteroGraph
Minjie Wang's avatar
Minjie Wang committed
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
    """
    if not nx_graph.is_directed():
        nx_graph = nx_graph.to_directed()

    top_nodes = {n for n, d in nx_graph.nodes(data=True) if d['bipartite'] == 0}
    bottom_nodes = set(nx_graph) - top_nodes
    top_nodes = sorted(top_nodes)
    bottom_nodes = sorted(bottom_nodes)
    top_map = {n : i for i, n in enumerate(top_nodes)}
    bottom_map = {n : i for i, n in enumerate(bottom_nodes)}

    if nx_graph.number_of_edges() > 0:
        has_edge_id = edge_id_attr_name in next(iter(nx_graph.edges(data=True)))[-1]
    else:
        has_edge_id = False

    if has_edge_id:
        num_edges = nx_graph.number_of_edges()
        src = np.zeros((num_edges,), dtype=np.int64)
        dst = np.zeros((num_edges,), dtype=np.int64)
        for u, v, attr in nx_graph.edges(data=True):
            eid = attr[edge_id_attr_name]
            src[eid] = top_map[u]
            dst[eid] = bottom_map[v]
    else:
        src = []
        dst = []
        for e in nx_graph.edges:
            if e[0] in top_map:
                src.append(top_map[e[0]])
                dst.append(bottom_map[e[1]])
    src = utils.toindex(src)
    dst = utils.toindex(dst)
    g = create_from_edges(src, dst, utype, etype, vtype, len(top_nodes), len(bottom_nodes))

    # TODO attributes
Mufei Li's avatar
Mufei Li committed
908
909
    assert node_attrs is None, 'Retrieval of node attributes are not supported yet.'
    assert edge_attrs is None, 'Retrieval of edge attributes are not supported yet.'
Minjie Wang's avatar
Minjie Wang committed
910
911
912
913
914
    return g

def to_networkx(g, node_attrs=None, edge_attrs=None):
    """Convert to networkx graph.

Mufei Li's avatar
Mufei Li committed
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
    The edge id will be saved as the 'id' edge attribute.

    Parameters
    ----------
    g : DGLGraph or DGLHeteroGraph
        For DGLHeteroGraphs, we currently only support the
        case of one node type and one edge type.
    node_attrs : iterable of str, optional
        The node attributes to be copied. (Default: None)
    edge_attrs : iterable of str, optional
        The edge attributes to be copied. (Default: None)

    Returns
    -------
    networkx.DiGraph
        The nx graph
Minjie Wang's avatar
Minjie Wang committed
931
932
    """
    return g.to_networkx(node_attrs, edge_attrs)