2_dglgraph.py 7 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
"""
How Does DGL Represent A Graph?
===============================

By the end of this tutorial you will be able to:

-  Construct a graph in DGL from scratch.
-  Assign node and edge features to a graph.
-  Query properties of a DGL graph such as node degrees and
   connectivity.
-  Transform a DGL graph into another graph.
-  Load and save DGL graphs.

(Time estimate: 16 minutes)

"""


######################################################################
# DGL Graph Construction
# ----------------------
# 
# DGL represents a directed graph as a ``DGLGraph`` object. You can
# construct a graph by specifying the number of nodes in the graph as well
# as the list of source and destination nodes.  Nodes in the graph have
# consecutive IDs starting from 0.
# 
# For instance, the following code constructs a directed star graph with 5
# leaves. The center node's ID is 0. The edges go from the
# center node to the leaves.
# 

import dgl
import numpy as np
import torch

g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6)
# Equivalently, PyTorch LongTensors also work.
g = dgl.graph((torch.LongTensor([0, 0, 0, 0, 0]), torch.LongTensor([1, 2, 3, 4, 5])), num_nodes=6)

# You can omit the number of nodes argument if you can tell the number of nodes from the edge list alone.
g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]))


######################################################################
# Edges in the graph have consecutive IDs starting from 0, and are
# in the same order as the list of source and destination nodes during
# creation.
# 

# Print the source and destination nodes of every edge.
print(g.edges())


######################################################################
# .. note::
# 
#    ``DGLGraph``'s are always directed to best fit the computation
#    pattern of graph neural networks, where the messages sent
#    from one node to the other are often different between both
#    directions. If you want to handle undirected graphs, you may consider
#    treating it as a bidirectional graph. See `Graph
#    Transformations`_ for an example of making
#    a bidirectional graph.
# 


######################################################################
# Assigning Node and Edge Features to Graph
# -----------------------------------------
# 
# Many graph data contain attributes on nodes and edges.
# Although the types of node and edge attributes can be arbitrary in real
# world, ``DGLGraph`` only accepts attributes stored in tensors (with
# numerical contents). Consequently, an attribute of all the nodes or
# edges must have the same shape. In the context of deep learning, those
# attributes are often called *features*.
# 
# You can assign and retrieve node and edge features via ``ndata`` and
# ``edata`` interface.
# 

# Assign a 3-dimensional node feature vector for each node.
g.ndata['x'] = torch.randn(6, 3)
# Assign a 4-dimensional edge feature vector for each edge.
g.edata['a'] = torch.randn(5, 4)
# Assign a 5x4 node feature matrix for each node.  Node and edge features in DGL can be multi-dimensional.
g.ndata['y'] = torch.randn(6, 5, 4)

print(g.edata['a'])


######################################################################
# .. note::
# 
#    The vast development of deep learning has provided us many
#    ways to encode various types of attributes into numerical features.
#    Here are some general suggestions:
# 
#    -  For categorical attributes (e.g. gender, occupation), consider
#       converting them to integers or one-hot encoding.
#    -  For variable length string contents (e.g. news article, quote),
#       consider applying a language model.
#    -  For images, consider applying a vision model such as CNNs.
# 
#    You can find plenty of materials on how to encode such attributes
#    into a tensor in the `PyTorch Deep Learning
#    Tutorials <https://pytorch.org/tutorials/>`__.
# 


######################################################################
# Querying Graph Structures
# -------------------------
# 
# ``DGLGraph`` object provides various methods to query a graph structure.
# 

print(g.num_nodes())
print(g.num_edges())
# Out degrees of the center node
print(g.out_degrees(0))
# In degrees of the center node - note that the graph is directed so the in degree should be 0.
print(g.in_degrees(0))


######################################################################
# Graph Transformations
# ---------------------
# 


######################################################################
# DGL provides many APIs to transform a graph to another such as
# extracting a subgraph:
# 

# Induce a subgraph from node 0, node 1 and node 3 from the original graph.
sg1 = g.subgraph([0, 1, 3])
# Induce a subgraph from edge 0, edge 1 and edge 3 from the original graph.
sg2 = g.edge_subgraph([0, 1, 3])


######################################################################
# You can obtain the node/edge mapping from the subgraph to the original
# graph by looking into the node feature ``dgl.NID`` or edge feature
# ``dgl.EID`` in the new graph.
# 

# The original IDs of each node in sg1
print(sg1.ndata[dgl.NID])
# The original IDs of each edge in sg1
print(sg1.edata[dgl.EID])
# The original IDs of each node in sg2
print(sg2.ndata[dgl.NID])
# The original IDs of each edge in sg2
print(sg2.edata[dgl.EID])


######################################################################
# ``subgraph`` and ``edge_subgraph`` also copies the original features
# to the subgraph:
#

# The original node feature of each node in sg1
print(sg1.ndata['x'])
# The original edge feature of each node in sg1
print(sg1.edata['a'])
# The original node feature of each node in sg2
print(sg2.ndata['x'])
# The original edge feature of each node in sg2
print(sg2.edata['a'])


######################################################################
# Another common transformation is to add a reverse edge for each edge in
# the original graph with ``dgl.add_reverse_edges``.
# 
# .. note::
# 
#    If you have an undirected graph, it is better to convert it
#    into a bidirectional graph first via adding reverse edges.
# 

newg = dgl.add_reverse_edges(g)
newg.edges()


######################################################################
# Loading and Saving Graphs
# -------------------------
# 
# You can save a graph or a list of graphs via ``dgl.save_graphs`` and
# load them back with ``dgl.load_graphs``.
# 

# Save graphs
dgl.save_graphs('graph.dgl', g)
dgl.save_graphs('graphs.dgl', [g, sg1, sg2])

# Load graphs
(g,), _ = dgl.load_graphs('graph.dgl')
print(g)
(g, sg1, sg2), _ = dgl.load_graphs('graphs.dgl')
print(g)
print(sg1)
print(sg2)


######################################################################
# What’s next?
# ------------
# 
# -  See
#    :ref:`here <apigraph-querying-graph-structure>`
#    for a list of graph structure query APIs.
# -  See
#    :ref:`here <api-subgraph-extraction>`
#    for a list of subgraph extraction routines.
# -  See
#    :ref:`here <api-transform>`
#    for a list of graph transformation routines.
# -  API reference of :func:`dgl.save_graphs`
#    and
#    :func:`dgl.load_graphs`
#