Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
836fbb00
Unverified
Commit
836fbb00
authored
Dec 12, 2023
by
Rhett Ying
Committed by
GitHub
Dec 12, 2023
Browse files
[GraphBolt] move to_dgl() from datapipe to dataloader iter (#6728)
parent
b20455a2
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
36 additions
and
28 deletions
+36
-28
docs/source/guide/minibatch-custom-sampler.rst
docs/source/guide/minibatch-custom-sampler.rst
+2
-2
docs/source/guide/minibatch-edge.rst
docs/source/guide/minibatch-edge.rst
+15
-14
docs/source/guide/minibatch-inference.rst
docs/source/guide/minibatch-inference.rst
+1
-1
docs/source/guide/minibatch-link.rst
docs/source/guide/minibatch-link.rst
+4
-2
docs/source/guide/minibatch-node.rst
docs/source/guide/minibatch-node.rst
+14
-8
docs/source/guide/minibatch-parallelism.rst
docs/source/guide/minibatch-parallelism.rst
+0
-1
No files found.
docs/source/guide/minibatch-custom-sampler.rst
View file @
836fbb00
...
...
@@ -47,11 +47,11 @@ To use this sampler with :class:`~dgl.graphbolt.DataLoader`:
datapipe = gb.ItemSampler(train_set, batch_size=1024, shuffle=True)
datapipe = datapipe.customized_sample_neighbor(g, [10, 10]) # 2 layers.
datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"])
datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device)
dataloader = gb.DataLoader(datapipe, num_workers=0)
for data in dataloader:
data = data.to_dgl()
input_features = data.node_features["feat"]
output_labels = data.labels
output_predictions = model(data.blocks, input_features)
...
...
@@ -93,11 +93,11 @@ can be used on heterogeneous graphs:
datapipe = datapipe.fetch_feature(
feature, node_feature_keys={"user": ["feat"], "item": ["feat"]}
)
datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device)
dataloader = gb.DataLoader(datapipe, num_workers=0)
for data in dataloader:
data = data.to_dgl()
input_features = {
ntype: data.node_features[(ntype, "feat")]
for ntype in data.blocks[0].srctypes
...
...
docs/source/guide/minibatch-edge.rst
View file @
836fbb00
...
...
@@ -38,16 +38,18 @@ edges(namely, node pairs) in the training set instead of the nodes.
#
Or
equivalently
:
#
datapipe
=
gb
.
NeighborSampler
(
datapipe
,
g
,
[
10
,
10
])
datapipe
=
datapipe
.
fetch_feature
(
feature
,
node_feature_keys
=[
"feat"
])
datapipe
=
datapipe
.
to_dgl
()
datapipe
=
datapipe
.
copy_to
(
device
)
dataloader
=
gb
.
DataLoader
(
datapipe
,
num_workers
=
0
)
Iterating
over
the
DataLoader
will
yield
:
class
:`~
dgl
.
graphbolt
.
DGL
MiniBatch
`
Iterating
over
the
DataLoader
will
yield
:
class
:`~
dgl
.
graphbolt
.
MiniBatch
`
which
contains
a
list
of
specially
created
graphs
representing
the
computation
dependencies
on
each
layer
.
They
are
called
*
message
flow
graphs
*
(
MFGs
)
in
DGL
.
dependencies
on
each
layer
.
In
order
to
train
with
DGL
,
you
need
to
convert
them
to
:
class
:`~
dgl
.
graphbolt
.
DGLMiniBatch
`.
Then
you
can
access
the
*
message
flow
graphs
*
(
MFGs
).
..
code
::
python
mini_batch
=
next
(
iter
(
dataloader
))
mini_batch
=
mini_batch
.
to_dgl
()
print
(
mini_batch
.
blocks
)
..
note
::
...
...
@@ -91,7 +93,6 @@ You can use :func:`~dgl.graphbolt.exclude_seed_edges` alongside with
exclude_seed_edges
=
partial
(
gb
.
exclude_seed_edges
,
include_reverse_edges
=
True
)
datapipe
=
datapipe
.
transform
(
exclude_seed_edges
)
datapipe
=
datapipe
.
fetch_feature
(
feature
,
node_feature_keys
=[
"feat"
])
datapipe
=
datapipe
.
to_dgl
()
datapipe
=
datapipe
.
copy_to
(
device
)
dataloader
=
gb
.
DataLoader
(
datapipe
,
num_workers
=
0
)
...
...
@@ -181,6 +182,7 @@ their incident node representations.
opt
=
torch
.
optim
.
Adam
(
model
.
parameters
())
for
data
in
dataloader
:
data
=
data
.
to_dgl
()
blocks
=
data
.
blocks
x
=
data
.
edge_features
(
"feat"
)
y_hat
=
model
(
data
.
blocks
,
x
,
data
.
positive_node_pairs
)
...
...
@@ -273,7 +275,6 @@ only difference is that the train_set is now an instance of
datapipe
=
datapipe
.
fetch_feature
(
feature
,
node_feature_keys
={
"item"
:
[
"feat"
],
"user"
:
[
"feat"
]}
)
datapipe
=
datapipe
.
to_dgl
()
datapipe
=
datapipe
.
copy_to
(
device
)
dataloader
=
gb
.
DataLoader
(
datapipe
,
num_workers
=
0
)
...
...
@@ -310,17 +311,17 @@ dictionaries of node types and predictions here.
..
code
::
python
import
torch
.
nn
.
functional
as
F
model
=
Model
(
in_features
,
hidden_features
,
out_features
,
num_classes
,
etypes
)
model
=
model
.
cuda
(
)
model
=
model
.
to
(
device
)
opt
=
torch
.
optim
.
Adam
(
model
.
parameters
())
for
input_nodes
,
edge_subgraph
,
blocks
in
dataloader
:
blocks
=
[
b
.
to
(
torch
.
device
(
'cuda'
))
for
b
in
blocks
]
edge_subgraph
=
edge_subgraph
.
to
(
torch
.
device
(
'cuda'
))
input_features
=
blocks
[
0
].
srcdata
[
'features'
]
edge_labels
=
edge_subgraph
.
edata
[
'labels'
]
edge_predictions
=
model
(
edge_subgraph
,
blocks
,
input_features
)
loss
=
compute_loss
(
edge_labels
,
edge_predictions
)
for
data
in
dataloader
:
data
=
data
.
to_dgl
()
blocks
=
data
.
blocks
x
=
data
.
edge_features
((
"user:like:item"
,
"feat"
))
y_hat
=
model
(
data
.
blocks
,
x
,
data
.
positive_node_pairs
)
loss
=
F
.
cross_entropy
(
data
.
labels
,
y_hat
)
opt
.
zero_grad
()
loss
.
backward
()
opt
.
step
()
...
...
docs/source/guide/minibatch-inference.rst
View file @
836fbb00
...
...
@@ -47,7 +47,6 @@ only one layer at a time.
datapipe = gb.ItemSampler(all_nodes_set, batch_size=1024, shuffle=True)
datapipe = datapipe.sample_neighbor(g, [-1]) # 1 layers.
datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"])
datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device)
dataloader = gb.DataLoader(datapipe, num_workers=0)
...
...
@@ -100,6 +99,7 @@ and combined as well.
feature = feature.to(device)
for step, data in tqdm(enumerate(dataloader)):
data = data.to_dgl()
x = feature[data.input_nodes]
hidden_x = layer(data.blocks[0], x) # len(blocks) = 1
if not is_last_layer:
...
...
docs/source/guide/minibatch-link.rst
View file @
836fbb00
...
...
@@ -27,7 +27,6 @@ The whole data loader pipeline is as follows:
datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers.
datapipe = datapipe.transform(gb.exclude_seed_edges)
datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"])
datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device)
dataloader = gb.DataLoader(datapipe, num_workers=0)
...
...
@@ -130,6 +129,8 @@ above.
total_loss = 0
start_epoch_time = time.time()
for step, data in enumerate(dataloader):
# Convert MiniBatch to DGLMiniBatch.
data = data.to_dgl()
# Unpack MiniBatch.
compacted_pairs, labels = to_binary_link_dgl_computing_pack(data)
node_feature = data.node_features["feat"]
...
...
@@ -213,7 +214,6 @@ only difference is that you need to give edge types for feature fetching.
feature,
node_feature_keys={"user": ["feat"], "item": ["feat"]}
)
datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device)
dataloader = gb.DataLoader(datapipe, num_workers=0)
...
...
@@ -273,6 +273,8 @@ except for computing loss on specific edge type.
total_loss = 0
start_epoch_time = time.time()
for step, data in enumerate(dataloader):
# Convert MiniBatch to DGLMiniBatch.
data = data.to_dgl()
# Unpack MiniBatch.
compacted_pairs, labels = to_binary_link_dgl_computing_pack(data, category)
node_features = {
...
...
docs/source/guide/minibatch-node.rst
View file @
836fbb00
...
...
@@ -50,18 +50,20 @@ putting the list of generated MFGs onto GPU.
#
Or
equivalently
:
#
datapipe
=
gb
.
NeighborSampler
(
datapipe
,
g
,
[
10
,
10
])
datapipe
=
datapipe
.
fetch_feature
(
feature
,
node_feature_keys
=[
"feat"
])
datapipe
=
datapipe
.
to_dgl
()
datapipe
=
datapipe
.
copy_to
(
device
)
dataloader
=
gb
.
DataLoader
(
datapipe
,
num_workers
=
0
)
Iterating
over
the
DataLoader
will
yield
:
class
:`~
dgl
.
graphbolt
.
DGL
MiniBatch
`
Iterating
over
the
DataLoader
will
yield
:
class
:`~
dgl
.
graphbolt
.
MiniBatch
`
which
contains
a
list
of
specially
created
graphs
representing
the
computation
dependencies
on
each
layer
.
They
are
called
*
message
flow
graphs
*
(
MFGs
)
in
DGL
.
dependencies
on
each
layer
.
In
order
to
train
with
DGL
,
you
need
to
convert
them
to
:
class
:`~
dgl
.
graphbolt
.
DGLMiniBatch
`.
Then
you
could
access
the
*
message
flow
graphs
*
(
MFGs
).
..
code
::
python
mini_batch
=
next
(
iter
(
dataloader
))
mini_batch
=
mini_batch
.
to_dgl
()
print
(
mini_batch
.
blocks
)
...
...
@@ -128,17 +130,20 @@ Training Loop
The
training
loop
simply
consists
of
iterating
over
the
dataset
with
the
customized
batching
iterator
.
During
each
iteration
that
yields
:
class
:`~
dgl
.
graphbolt
.
DGL
MiniBatch
`,
we
:
:
class
:`~
dgl
.
graphbolt
.
MiniBatch
`,
we
:
1.
Access
the
node
features
corresponding
to
the
input
nodes
via
1.
Convert
the
:
class
:`~
dgl
.
graphbolt
.
MiniBatch
`
to
:
class
:`~
dgl
.
graphbolt
.
DGLMiniBatch
`.
2.
Access
the
node
features
corresponding
to
the
input
nodes
via
``
data
.
node_features
[
"feat"
]``.
These
features
are
already
moved
to
the
target
device
(
CPU
or
GPU
)
by
the
data
loader
.
2
.
Access
the
node
labels
corresponding
to
the
output
nodes
via
3
.
Access
the
node
labels
corresponding
to
the
output
nodes
via
``
data
.
labels
``.
These
labels
are
already
moved
to
the
target
device
(
CPU
or
GPU
)
by
the
data
loader
.
3
.
Feed
the
list
of
MFGs
and
the
input
node
features
to
the
multilayer
4
.
Feed
the
list
of
MFGs
and
the
input
node
features
to
the
multilayer
GNN
and
get
the
outputs
.
4.
Compute
the
loss
and
backpropagate
.
...
...
@@ -150,6 +155,7 @@ customized batching iterator. During each iteration that yields
opt
=
torch
.
optim
.
Adam
(
model
.
parameters
())
for
data
in
dataloader
:
data
=
data
.
to_dgl
()
input_features
=
data
.
node_features
[
"feat"
]
output_labels
=
data
.
labels
output_predictions
=
model
(
data
.
blocks
,
input_features
)
...
...
@@ -215,7 +221,6 @@ of node types to node IDs.
datapipe
=
datapipe
.
fetch_feature
(
feature
,
node_feature_keys
={
"author"
:
[
"feat"
],
"paper"
:
[
"feat"
]}
)
datapipe
=
datapipe
.
to_dgl
()
datapipe
=
datapipe
.
copy_to
(
device
)
dataloader
=
gb
.
DataLoader
(
datapipe
,
num_workers
=
0
)
...
...
@@ -230,6 +235,7 @@ dictionaries of node types and predictions here.
opt
=
torch
.
optim
.
Adam
(
model
.
parameters
())
for
data
in
dataloader
:
data
=
data
.
to_dgl
()
#
For
heterogeneous
graphs
,
we
need
to
specify
the
node
types
and
#
feature
name
when
accessing
the
node
features
.
So
does
the
labels
.
input_features
=
{
...
...
docs/source/guide/minibatch-parallelism.rst
View file @
836fbb00
...
...
@@ -21,7 +21,6 @@ generate a minibatch, including:
datapipe = datapipe.sample_neighbor(g, [10, 10]) # 2 layers.
datapipe = datapipe.transform(gb.exclude_seed_edges)
datapipe = datapipe.fetch_feature(feature, node_feature_keys=["feat"])
datapipe = datapipe.to_dgl()
datapipe = datapipe.copy_to(device)
dataloader = gb.DataLoader(datapipe, num_workers=0)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment