Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
6a3cb548
Unverified
Commit
6a3cb548
authored
Oct 19, 2022
by
Hongzhi (Steve), Chen
Committed by
GitHub
Oct 19, 2022
Browse files
black (#4722)
Co-authored-by:
Steve
<
ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal
>
parent
c78ddee2
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
78 additions
and
41 deletions
+78
-41
tutorials/large/L2_large_link_prediction.py
tutorials/large/L2_large_link_prediction.py
+78
-41
No files found.
tutorials/large/L2_large_link_prediction.py
View file @
6a3cb548
...
...
@@ -57,8 +57,8 @@ import torch
import
numpy
as
np
from
ogb.nodeproppred
import
DglNodePropPredDataset
dataset
=
DglNodePropPredDataset
(
'
ogbn-arxiv
'
)
device
=
'
cpu
'
# change to 'cuda' for GPU
dataset
=
DglNodePropPredDataset
(
"
ogbn-arxiv
"
)
device
=
"
cpu
"
# change to 'cuda' for GPU
graph
,
node_labels
=
dataset
[
0
]
# Add reverse edges since ogbn-arxiv is unidirectional.
...
...
@@ -66,16 +66,16 @@ graph = dgl.add_reverse_edges(graph)
print
(
graph
)
print
(
node_labels
)
node_features
=
graph
.
ndata
[
'
feat
'
]
node_features
=
graph
.
ndata
[
"
feat
"
]
node_labels
=
node_labels
[:,
0
]
num_features
=
node_features
.
shape
[
1
]
num_classes
=
(
node_labels
.
max
()
+
1
).
item
()
print
(
'
Number of classes:
'
,
num_classes
)
print
(
"
Number of classes:
"
,
num_classes
)
idx_split
=
dataset
.
get_idx_split
()
train_nids
=
idx_split
[
'
train
'
]
valid_nids
=
idx_split
[
'
valid
'
]
test_nids
=
idx_split
[
'
test
'
]
train_nids
=
idx_split
[
"
train
"
]
valid_nids
=
idx_split
[
"
valid
"
]
test_nids
=
idx_split
[
"
test
"
]
######################################################################
...
...
@@ -112,7 +112,8 @@ negative_sampler = dgl.dataloading.negative_sampler.Uniform(5)
sampler
=
dgl
.
dataloading
.
NeighborSampler
([
4
,
4
])
sampler
=
dgl
.
dataloading
.
as_edge_prediction_sampler
(
sampler
,
negative_sampler
=
negative_sampler
)
sampler
,
negative_sampler
=
negative_sampler
)
train_dataloader
=
dgl
.
dataloading
.
DataLoader
(
# The following arguments are specific to DataLoader.
graph
,
# The graph
...
...
@@ -123,7 +124,7 @@ train_dataloader = dgl.dataloading.DataLoader(
batch_size
=
1024
,
# Batch size
shuffle
=
True
,
# Whether to shuffle the nodes for every epoch
drop_last
=
False
,
# Whether to drop the last incomplete batch
num_workers
=
0
# Number of sampler processes
num_workers
=
0
,
# Number of sampler processes
)
...
...
@@ -133,9 +134,19 @@ train_dataloader = dgl.dataloading.DataLoader(
#
input_nodes
,
pos_graph
,
neg_graph
,
mfgs
=
next
(
iter
(
train_dataloader
))
print
(
'Number of input nodes:'
,
len
(
input_nodes
))
print
(
'Positive graph # nodes:'
,
pos_graph
.
number_of_nodes
(),
'# edges:'
,
pos_graph
.
number_of_edges
())
print
(
'Negative graph # nodes:'
,
neg_graph
.
number_of_nodes
(),
'# edges:'
,
neg_graph
.
number_of_edges
())
print
(
"Number of input nodes:"
,
len
(
input_nodes
))
print
(
"Positive graph # nodes:"
,
pos_graph
.
number_of_nodes
(),
"# edges:"
,
pos_graph
.
number_of_edges
(),
)
print
(
"Negative graph # nodes:"
,
neg_graph
.
number_of_nodes
(),
"# edges:"
,
neg_graph
.
number_of_edges
(),
)
print
(
mfgs
)
...
...
@@ -174,21 +185,23 @@ import torch.nn as nn
import
torch.nn.functional
as
F
from
dgl.nn
import
SAGEConv
class
Model
(
nn
.
Module
):
def
__init__
(
self
,
in_feats
,
h_feats
):
super
(
Model
,
self
).
__init__
()
self
.
conv1
=
SAGEConv
(
in_feats
,
h_feats
,
aggregator_type
=
'
mean
'
)
self
.
conv2
=
SAGEConv
(
h_feats
,
h_feats
,
aggregator_type
=
'
mean
'
)
self
.
conv1
=
SAGEConv
(
in_feats
,
h_feats
,
aggregator_type
=
"
mean
"
)
self
.
conv2
=
SAGEConv
(
h_feats
,
h_feats
,
aggregator_type
=
"
mean
"
)
self
.
h_feats
=
h_feats
def
forward
(
self
,
mfgs
,
x
):
h_dst
=
x
[:
mfgs
[
0
].
num_dst_nodes
()]
h_dst
=
x
[:
mfgs
[
0
].
num_dst_nodes
()]
h
=
self
.
conv1
(
mfgs
[
0
],
(
x
,
h_dst
))
h
=
F
.
relu
(
h
)
h_dst
=
h
[:
mfgs
[
1
].
num_dst_nodes
()]
h_dst
=
h
[:
mfgs
[
1
].
num_dst_nodes
()]
h
=
self
.
conv2
(
mfgs
[
1
],
(
h
,
h_dst
))
return
h
model
=
Model
(
num_features
,
128
).
to
(
device
)
...
...
@@ -207,15 +220,16 @@ model = Model(num_features, 128).to(device)
import
dgl.function
as
fn
class
DotPredictor
(
nn
.
Module
):
def
forward
(
self
,
g
,
h
):
with
g
.
local_scope
():
g
.
ndata
[
'h'
]
=
h
g
.
ndata
[
"h"
]
=
h
# Compute a new edge feature named 'score' by a dot-product between the
# source node feature 'h' and destination node feature 'h'.
g
.
apply_edges
(
fn
.
u_dot_v
(
'h'
,
'h'
,
'
score
'
))
g
.
apply_edges
(
fn
.
u_dot_v
(
"h"
,
"h"
,
"
score
"
))
# u_dot_v returns a 1-element vector for each edge so you need to squeeze it.
return
g
.
edata
[
'
score
'
][:,
0
]
return
g
.
edata
[
"
score
"
][:,
0
]
######################################################################
...
...
@@ -244,29 +258,35 @@ class DotPredictor(nn.Module):
# guide <guide-minibatch-inference>`.
#
def
inference
(
model
,
graph
,
node_features
):
with
torch
.
no_grad
():
nodes
=
torch
.
arange
(
graph
.
number_of_nodes
())
sampler
=
dgl
.
dataloading
.
NeighborSampler
([
4
,
4
])
train_dataloader
=
dgl
.
dataloading
.
DataLoader
(
graph
,
torch
.
arange
(
graph
.
number_of_nodes
()),
sampler
,
graph
,
torch
.
arange
(
graph
.
number_of_nodes
()),
sampler
,
batch_size
=
1024
,
shuffle
=
False
,
drop_last
=
False
,
num_workers
=
4
,
device
=
device
)
device
=
device
,
)
result
=
[]
for
input_nodes
,
output_nodes
,
mfgs
in
train_dataloader
:
# feature copy from CPU to GPU takes place here
inputs
=
mfgs
[
0
].
srcdata
[
'
feat
'
]
inputs
=
mfgs
[
0
].
srcdata
[
"
feat
"
]
result
.
append
(
model
(
mfgs
,
inputs
))
return
torch
.
cat
(
result
)
import
sklearn.metrics
def
evaluate
(
emb
,
label
,
train_nids
,
valid_nids
,
test_nids
):
classifier
=
nn
.
Linear
(
emb
.
shape
[
1
],
num_classes
).
to
(
device
)
opt
=
torch
.
optim
.
LBFGS
(
classifier
.
parameters
())
...
...
@@ -282,13 +302,13 @@ def evaluate(emb, label, train_nids, valid_nids, test_nids):
loss
.
backward
()
return
loss
prev_loss
=
float
(
'
inf
'
)
prev_loss
=
float
(
"
inf
"
)
for
i
in
range
(
1000
):
opt
.
step
(
closure
)
with
torch
.
no_grad
():
loss
=
compute_loss
().
item
()
if
np
.
abs
(
loss
-
prev_loss
)
<
1e-4
:
print
(
'
Converges at iteration
'
,
i
)
print
(
"
Converges at iteration
"
,
i
)
break
else
:
prev_loss
=
loss
...
...
@@ -296,8 +316,12 @@ def evaluate(emb, label, train_nids, valid_nids, test_nids):
with
torch
.
no_grad
():
pred
=
classifier
(
emb
.
to
(
device
)).
cpu
()
label
=
label
valid_acc
=
sklearn
.
metrics
.
accuracy_score
(
label
[
valid_nids
].
numpy
(),
pred
[
valid_nids
].
numpy
().
argmax
(
1
))
test_acc
=
sklearn
.
metrics
.
accuracy_score
(
label
[
test_nids
].
numpy
(),
pred
[
test_nids
].
numpy
().
argmax
(
1
))
valid_acc
=
sklearn
.
metrics
.
accuracy_score
(
label
[
valid_nids
].
numpy
(),
pred
[
valid_nids
].
numpy
().
argmax
(
1
)
)
test_acc
=
sklearn
.
metrics
.
accuracy_score
(
label
[
test_nids
].
numpy
(),
pred
[
test_nids
].
numpy
().
argmax
(
1
)
)
return
valid_acc
,
test_acc
...
...
@@ -323,32 +347,40 @@ import tqdm
import
sklearn.metrics
best_accuracy
=
0
best_model_path
=
'
model.pt
'
best_model_path
=
"
model.pt
"
for
epoch
in
range
(
1
):
with
tqdm
.
tqdm
(
train_dataloader
)
as
tq
:
for
step
,
(
input_nodes
,
pos_graph
,
neg_graph
,
mfgs
)
in
enumerate
(
tq
):
# feature copy from CPU to GPU takes place here
inputs
=
mfgs
[
0
].
srcdata
[
'
feat
'
]
inputs
=
mfgs
[
0
].
srcdata
[
"
feat
"
]
outputs
=
model
(
mfgs
,
inputs
)
pos_score
=
predictor
(
pos_graph
,
outputs
)
neg_score
=
predictor
(
neg_graph
,
outputs
)
score
=
torch
.
cat
([
pos_score
,
neg_score
])
label
=
torch
.
cat
([
torch
.
ones_like
(
pos_score
),
torch
.
zeros_like
(
neg_score
)])
label
=
torch
.
cat
(
[
torch
.
ones_like
(
pos_score
),
torch
.
zeros_like
(
neg_score
)]
)
loss
=
F
.
binary_cross_entropy_with_logits
(
score
,
label
)
opt
.
zero_grad
()
loss
.
backward
()
opt
.
step
()
tq
.
set_postfix
({
'
loss
'
:
'
%.03f
'
%
loss
.
item
()},
refresh
=
False
)
tq
.
set_postfix
({
"
loss
"
:
"
%.03f
"
%
loss
.
item
()},
refresh
=
False
)
if
(
step
+
1
)
%
500
==
0
:
model
.
eval
()
emb
=
inference
(
model
,
graph
,
node_features
)
valid_acc
,
test_acc
=
evaluate
(
emb
,
node_labels
,
train_nids
,
valid_nids
,
test_nids
)
print
(
'Epoch {} Validation Accuracy {} Test Accuracy {}'
.
format
(
epoch
,
valid_acc
,
test_acc
))
valid_acc
,
test_acc
=
evaluate
(
emb
,
node_labels
,
train_nids
,
valid_nids
,
test_nids
)
print
(
"Epoch {} Validation Accuracy {} Test Accuracy {}"
.
format
(
epoch
,
valid_acc
,
test_acc
)
)
if
best_accuracy
<
valid_acc
:
best_accuracy
=
valid_acc
torch
.
save
(
model
.
state_dict
(),
best_model_path
)
...
...
@@ -383,7 +415,8 @@ for epoch in range(1):
n_test_pos
=
1000
test_pos_src
,
test_pos_dst
=
(
torch
.
randint
(
0
,
graph
.
num_nodes
(),
(
n_test_pos
,)),
torch
.
randint
(
0
,
graph
.
num_nodes
(),
(
n_test_pos
,)))
torch
.
randint
(
0
,
graph
.
num_nodes
(),
(
n_test_pos
,)),
)
# Negative pairs. Likewise, you will need to replace them with your
# own ground truth.
test_neg_src
=
test_pos_src
...
...
@@ -410,10 +443,14 @@ h_neg_dst = node_reprs[test_neg_dst]
score_pos
=
(
h_pos_src
*
h_pos_dst
).
sum
(
1
)
score_neg
=
(
h_neg_src
*
h_neg_dst
).
sum
(
1
)
test_preds
=
torch
.
cat
([
score_pos
,
score_neg
]).
cpu
().
numpy
()
test_labels
=
torch
.
cat
([
torch
.
ones_like
(
score_pos
),
torch
.
zeros_like
(
score_neg
)]).
cpu
().
numpy
()
test_labels
=
(
torch
.
cat
([
torch
.
ones_like
(
score_pos
),
torch
.
zeros_like
(
score_neg
)])
.
cpu
()
.
numpy
()
)
auc
=
sklearn
.
metrics
.
roc_auc_score
(
test_labels
,
test_preds
)
print
(
'
Link Prediction AUC:
'
,
auc
)
print
(
"
Link Prediction AUC:
"
,
auc
)
######################################################################
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment