Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
ba2ca4be
Unverified
Commit
ba2ca4be
authored
Nov 20, 2023
by
xiangyuzhi
Committed by
GitHub
Nov 20, 2023
Browse files
[Sparse] Add a LADIES example (#6560)
parent
76ce14b7
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
256 additions
and
0 deletions
+256
-0
examples/sparse/sampling/ladies.py
examples/sparse/sampling/ladies.py
+256
-0
No files found.
examples/sparse/sampling/ladies.py
0 → 100644
View file @
ba2ca4be
"""
This script demonstrates how to use dgl sparse library to sample on graph and
train model. It trains and tests a LADIES model using the sparse power and
sp_broadcast_v operators to sample submatrix from the whole matrix.
This flowchart describes the main functional sequence of the provided example.
main
│
├───> Load and preprocess full dataset
│
├───> Instantiate LADIES model
│
├───> train
│ │
│ └───> Training loop
│ │
│ ├───> Sample submatrix
│ │
│ └───> LADIES.forward
└───> test
│
├───> Sample submatrix
│
└───> Evaluate the model
"""
import
argparse
import
dgl.sparse
as
dglsp
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torchmetrics.functional
as
MF
from
dgl.data
import
AsNodePredDataset
from
dgl.sparse
import
sp_broadcast_v
from
ogb.nodeproppred
import
DglNodePropPredDataset
class
SAGEConv
(
nn
.
Module
):
r
"""LADIES layer from `Layer-Dependent Importance Sampling
for Training Deep and Large Graph Convolutional Networks
<https://arxiv.org/abs/1911.07323.pdf>`__"""
def
__init__
(
self
,
in_size
,
out_size
,
):
super
(
SAGEConv
,
self
).
__init__
()
self
.
_in_src_feats
,
self
.
_in_dst_feats
=
in_size
,
in_size
self
.
_out_size
=
out_size
self
.
fc_neigh
=
nn
.
Linear
(
self
.
_in_src_feats
,
out_size
,
bias
=
False
)
self
.
fc_self
=
nn
.
Linear
(
self
.
_in_dst_feats
,
out_size
,
bias
=
True
)
self
.
reset_parameters
()
def
reset_parameters
(
self
):
gain
=
nn
.
init
.
calculate_gain
(
"relu"
)
nn
.
init
.
xavier_uniform_
(
self
.
fc_self
.
weight
,
gain
=
gain
)
nn
.
init
.
xavier_uniform_
(
self
.
fc_neigh
.
weight
,
gain
=
gain
)
def
forward
(
self
,
A
,
feat
):
feat_src
=
feat
feat_dst
=
feat
[:
A
.
shape
[
1
]]
# Aggregator type: mean.
srcdata
=
self
.
fc_neigh
(
feat_src
)
# Divided by degree.
D_hat
=
dglsp
.
diag
(
A
.
sum
(
0
))
**
-
1
A_div
=
A
@
D_hat
# Conv neighbors.
dstdata
=
A_div
.
T
@
srcdata
rst
=
self
.
fc_self
(
feat_dst
)
+
dstdata
return
rst
class
LADIES
(
nn
.
Module
):
def
__init__
(
self
,
in_size
,
hid_size
,
out_size
):
super
().
__init__
()
self
.
layers
=
nn
.
ModuleList
()
# Three-layer LADIES.
self
.
layers
.
append
(
SAGEConv
(
in_size
,
hid_size
))
self
.
layers
.
append
(
SAGEConv
(
hid_size
,
hid_size
))
self
.
layers
.
append
(
SAGEConv
(
hid_size
,
out_size
))
self
.
dropout
=
nn
.
Dropout
(
0.5
)
self
.
hid_size
=
hid_size
self
.
out_size
=
out_size
def
forward
(
self
,
sampled_matrices
,
x
):
hidden_x
=
x
for
layer_idx
,
(
layer
,
sampled_matrix
)
in
enumerate
(
zip
(
self
.
layers
,
sampled_matrices
)
):
hidden_x
=
layer
(
sampled_matrix
,
hidden_x
)
if
layer_idx
!=
len
(
self
.
layers
)
-
1
:
hidden_x
=
F
.
relu
(
hidden_x
)
hidden_x
=
self
.
dropout
(
hidden_x
)
return
hidden_x
def
multilayer_sample
(
A
,
fanouts
,
seeds
,
ndata
):
sampled_matrices
=
[]
src
=
seeds
#########################################################################
# (HIGHLIGHT) Using the sparse sample operator to preform LADIES sampling
# algorithm from the neighboring nodes of the seeds nodes.
# The sparse sp_power operator is applied to compute sample probability,
# and sp_broadcast_v is then employed to normalize weight by performing
# division operations on column.
#########################################################################
for
fanout
in
fanouts
:
# Sample neighbors.
sub_A
=
A
.
index_select
(
1
,
src
)
# Compute probability weight.
row_probs
=
(
sub_A
**
2
).
sum
(
1
)
row_probs
=
row_probs
/
row_probs
.
sum
(
0
)
# Layer-wise sample nodes.
row_ids
=
torch
.
multinomial
(
row_probs
,
fanout
,
replacement
=
False
)
# Add self-loop.
row_ids
=
torch
.
cat
((
row_ids
,
src
),
0
).
unique
()
sampled_matrix
=
sub_A
.
index_select
(
0
,
row_ids
)
# Normalize edge weights.
div_matirx
=
sp_broadcast_v
(
sampled_matrix
,
row_probs
[
row_ids
].
reshape
(
-
1
,
1
),
"truediv"
)
div_matirx
=
sp_broadcast_v
(
div_matirx
,
div_matirx
.
sum
(
0
),
"truediv"
)
# Save the sampled matrix.
sampled_matrices
.
insert
(
0
,
div_matirx
)
src
=
row_ids
x
=
ndata
[
"feat"
][
src
]
y
=
ndata
[
"label"
][
seeds
]
return
sampled_matrices
,
x
,
y
def
evaluate
(
model
,
A
,
dataloader
,
ndata
,
num_classes
):
model
.
eval
()
ys
=
[]
y_hats
=
[]
fanouts
=
[
4000
,
4000
,
4000
]
for
seeds
in
dataloader
:
with
torch
.
no_grad
():
sampled_matrices
,
x
,
y
=
multilayer_sample
(
A
,
fanouts
,
seeds
,
ndata
)
ys
.
append
(
y
)
y_hats
.
append
(
model
(
sampled_matrices
,
x
))
return
MF
.
accuracy
(
torch
.
cat
(
y_hats
),
torch
.
cat
(
ys
),
task
=
"multiclass"
,
num_classes
=
num_classes
,
)
def
validate
(
device
,
A
,
ndata
,
dataset
,
model
,
batch_size
):
inf_id
=
dataset
.
test_idx
.
to
(
device
)
inf_dataloader
=
torch
.
utils
.
data
.
DataLoader
(
inf_id
,
batch_size
=
batch_size
)
acc
=
evaluate
(
model
,
A
,
inf_dataloader
,
ndata
,
dataset
.
num_classes
)
return
acc
def
train
(
device
,
A
,
ndata
,
dataset
,
model
):
# Create sampler & dataloader.
train_idx
=
dataset
.
train_idx
.
to
(
device
)
val_idx
=
dataset
.
val_idx
.
to
(
device
)
train_dataloader
=
torch
.
utils
.
data
.
DataLoader
(
train_idx
,
batch_size
=
1024
,
shuffle
=
True
)
val_dataloader
=
torch
.
utils
.
data
.
DataLoader
(
val_idx
,
batch_size
=
1024
)
optimizer
=
torch
.
optim
.
Adam
(
model
.
parameters
(),
lr
=
1e-3
,
weight_decay
=
5e-4
)
fanouts
=
[
4000
,
4000
,
4000
]
for
epoch
in
range
(
20
):
model
.
train
()
total_loss
=
0
for
it
,
seeds
in
enumerate
(
train_dataloader
):
sampled_matrices
,
x
,
y
=
multilayer_sample
(
A
,
fanouts
,
seeds
,
ndata
)
y_hat
=
model
(
sampled_matrices
,
x
)
loss
=
F
.
cross_entropy
(
y_hat
,
y
)
optimizer
.
zero_grad
()
loss
.
backward
()
optimizer
.
step
()
total_loss
+=
loss
.
item
()
acc
=
evaluate
(
model
,
A
,
val_dataloader
,
ndata
,
dataset
.
num_classes
)
print
(
"Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} "
.
format
(
epoch
,
total_loss
/
(
it
+
1
),
acc
.
item
()
)
)
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"LADIESConv"
)
parser
.
add_argument
(
"--mode"
,
default
=
"gpu"
,
choices
=
[
"cpu"
,
"gpu"
],
help
=
"Training mode. 'cpu' for CPU training, 'gpu' for GPU training."
,
)
args
=
parser
.
parse_args
()
if
not
torch
.
cuda
.
is_available
():
args
.
mode
=
"cpu"
print
(
f
"Training in
{
args
.
mode
}
mode."
)
#####################################################################
# (HIGHLIGHT) This example implements a LADIES algorithm by sparse
# operators, which involves sampling a subgraph from a full graph and
# conducting training.
#
# First, the whole graph is loaded onto the CPU or GPU and transformed
# to sparse matrix. To obtain the training subgraph, it samples three
# submatrices by seed nodes, which contains their layer-wise sampled
# 1-hop, 2-hop, and 3-hop neighbors. Then, the features of the
# subgraph are input to the network for training.
#####################################################################
# Load and preprocess dataset.
print
(
"Loading data"
)
device
=
torch
.
device
(
"cpu"
if
args
.
mode
==
"cpu"
else
"cuda"
)
dataset
=
AsNodePredDataset
(
DglNodePropPredDataset
(
"ogbn-products"
))
g
=
dataset
[
0
]
# Create LADIES model.
in_size
=
g
.
ndata
[
"feat"
].
shape
[
1
]
out_size
=
dataset
.
num_classes
model
=
LADIES
(
in_size
,
256
,
out_size
).
to
(
device
)
# Create sparse.
indices
=
torch
.
stack
(
g
.
edges
())
N
=
g
.
num_nodes
()
A
=
dglsp
.
spmatrix
(
indices
,
shape
=
(
N
,
N
)).
coalesce
()
I
=
dglsp
.
identity
(
A
.
shape
)
# Initialize laplacian matrix.
A_hat
=
A
+
I
D_hat
=
dglsp
.
diag
(
A_hat
.
sum
(
1
))
**
-
0.5
A_norm
=
D_hat
@
A_hat
@
D_hat
A_norm
=
A_norm
.
to
(
device
)
g
=
g
.
to
(
device
)
# Model training.
print
(
"Training..."
)
train
(
device
,
A_norm
,
g
.
ndata
,
dataset
,
model
)
# Test the model.
print
(
"Testing..."
)
acc
=
validate
(
device
,
A_norm
,
g
.
ndata
,
dataset
,
model
,
batch_size
=
2048
)
print
(
f
"Test accuracy
{
acc
:.
4
f
}
"
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment