Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
d4b5ddcd
Unverified
Commit
d4b5ddcd
authored
Jan 03, 2024
by
Mingbang Wang
Committed by
GitHub
Jan 03, 2024
Browse files
[Example] modify dgl rgcn example to make it consistent with graphbolt's (#6882)
parent
397b7599
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
90 additions
and
153 deletions
+90
-153
examples/core/rgcn/README.md
examples/core/rgcn/README.md
+4
-6
examples/core/rgcn/hetero_rgcn.py
examples/core/rgcn/hetero_rgcn.py
+86
-147
No files found.
examples/core/rgcn/README.md
View file @
d4b5ddcd
...
@@ -25,12 +25,10 @@ Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 9
...
@@ -25,12 +25,10 @@ Below results are roughly collected from an AWS EC2 **g4dn.metal**, 384GB RAM, 9
### Accuracies
### Accuracies
```
```
Final performance:
Epoch: 01, Loss: 2.3625, Valid: 48.25%, Test: 47.91%, Time 86.0210
All runs:
Epoch: 02, Loss: 1.5852, Valid: 48.56%, Test: 46.98%, Time 84.2728
Highest Train: 83.22 ± 0.00
Epoch: 03, Loss: 1.1974, Valid: 45.99%, Test: 44.05%, Time 85.7916
Highest Valid: 48.25 ± 0.20
Test accuracy 44.1165
Final Train: 68.45 ± 9.81
Final Test: 47.51 ± 0.19
```
```
## Run on `ogb-lsc-mag240m` dataset
## Run on `ogb-lsc-mag240m` dataset
...
...
examples/core/rgcn/hetero_rgcn.py
View file @
d4b5ddcd
...
@@ -46,9 +46,11 @@ main
...
@@ -46,9 +46,11 @@ main
│
│
└───> EntityClassify.evaluate
└───> EntityClassify.evaluate
"""
"""
import
argparse
import
argparse
import
itertools
import
itertools
import
sys
import
sys
import
time
import
dgl
import
dgl
import
dgl.nn
as
dglnn
import
dgl.nn
as
dglnn
...
@@ -56,7 +58,7 @@ import numpy as np
...
@@ -56,7 +58,7 @@ import numpy as np
import
psutil
import
psutil
import
torch
as
th
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
dgl
import
AddReverse
,
Compose
,
ToSimple
from
dgl
import
AddReverse
,
Compose
,
ToSimple
...
@@ -83,21 +85,22 @@ def prepare_data(args, device):
...
@@ -83,21 +85,22 @@ def prepare_data(args, device):
# Apply transformation to the graph.
# Apply transformation to the graph.
# - "ToSimple()" removes multi-edge between two nodes.
# - "ToSimple()" removes multi-edge between two nodes.
# - "AddReverse()" adds reverse edges to the graph.
# - "AddReverse()" adds reverse edges to the graph.
print
(
"Start to transform graph. This may take a while..."
)
transform
=
Compose
([
ToSimple
(),
AddReverse
()])
transform
=
Compose
([
ToSimple
(),
AddReverse
()])
g
=
transform
(
g
)
g
=
transform
(
g
)
else
:
else
:
dataset
=
MAG240MDataset
(
root
=
args
.
rootdir
)
dataset
=
MAG240MDataset
(
root
=
args
.
rootdir
)
(
g
,),
_
=
dgl
.
load_graphs
(
args
.
graph_path
)
(
g
,),
_
=
dgl
.
load_graphs
(
args
.
graph_path
)
g
=
g
.
formats
([
"csc"
])
g
=
g
.
formats
([
"csc"
])
labels
=
th
.
as_tensor
(
dataset
.
paper_label
).
long
()
labels
=
t
orc
h
.
as_tensor
(
dataset
.
paper_label
).
long
()
# As feature data is too large to fit in memory, we read it from disk.
# As feature data is too large to fit in memory, we read it from disk.
feats
[
"paper"
]
=
th
.
as_tensor
(
feats
[
"paper"
]
=
t
orc
h
.
as_tensor
(
np
.
load
(
args
.
paper_feature_path
,
mmap_mode
=
"r+"
)
np
.
load
(
args
.
paper_feature_path
,
mmap_mode
=
"r+"
)
)
)
feats
[
"author"
]
=
th
.
as_tensor
(
feats
[
"author"
]
=
t
orc
h
.
as_tensor
(
np
.
load
(
args
.
author_feature_path
,
mmap_mode
=
"r+"
)
np
.
load
(
args
.
author_feature_path
,
mmap_mode
=
"r+"
)
)
)
feats
[
"institution"
]
=
th
.
as_tensor
(
feats
[
"institution"
]
=
t
orc
h
.
as_tensor
(
np
.
load
(
args
.
inst_feature_path
,
mmap_mode
=
"r+"
)
np
.
load
(
args
.
inst_feature_path
,
mmap_mode
=
"r+"
)
)
)
print
(
f
"Loaded graph:
{
g
}
"
)
print
(
f
"Loaded graph:
{
g
}
"
)
...
@@ -356,57 +359,6 @@ class EntityClassify(nn.Module):
...
@@ -356,57 +359,6 @@ class EntityClassify(nn.Module):
return
h
return
h
class
Logger
(
object
):
r
"""
This class was taken directly from the PyG implementation and can be found
here: https://github.com/snap-stanford/ogb/blob/master/examples/nodeproppre
d/mag/logger.py
This was done to ensure that performance was measured in precisely the same
way
"""
def
__init__
(
self
,
runs
):
self
.
results
=
[[]
for
_
in
range
(
runs
)]
def
add_result
(
self
,
run
,
result
):
assert
len
(
result
)
==
3
assert
run
>=
0
and
run
<
len
(
self
.
results
)
self
.
results
[
run
].
append
(
result
)
def
print_statistics
(
self
,
run
=
None
):
if
run
is
not
None
:
result
=
100
*
th
.
tensor
(
self
.
results
[
run
])
argmax
=
result
[:,
1
].
argmax
().
item
()
print
(
f
"Run
{
run
+
1
:
02
d
}
:"
)
print
(
f
"Highest Train:
{
result
[:,
0
].
max
():.
2
f
}
"
)
print
(
f
"Highest Valid:
{
result
[:,
1
].
max
():.
2
f
}
"
)
print
(
f
" Final Train:
{
result
[
argmax
,
0
]:.
2
f
}
"
)
print
(
f
" Final Test:
{
result
[
argmax
,
2
]:.
2
f
}
"
)
else
:
result
=
100
*
th
.
tensor
(
self
.
results
)
best_results
=
[]
for
r
in
result
:
train1
=
r
[:,
0
].
max
().
item
()
valid
=
r
[:,
1
].
max
().
item
()
train2
=
r
[
r
[:,
1
].
argmax
(),
0
].
item
()
test
=
r
[
r
[:,
1
].
argmax
(),
2
].
item
()
best_results
.
append
((
train1
,
valid
,
train2
,
test
))
best_result
=
th
.
tensor
(
best_results
)
print
(
"All runs:"
)
r
=
best_result
[:,
0
]
print
(
f
"Highest Train:
{
r
.
mean
():.
2
f
}
±
{
r
.
std
():.
2
f
}
"
)
r
=
best_result
[:,
1
]
print
(
f
"Highest Valid:
{
r
.
mean
():.
2
f
}
±
{
r
.
std
():.
2
f
}
"
)
r
=
best_result
[:,
2
]
print
(
f
" Final Train:
{
r
.
mean
():.
2
f
}
±
{
r
.
std
():.
2
f
}
"
)
r
=
best_result
[:,
3
]
print
(
f
" Final Test:
{
r
.
mean
():.
2
f
}
±
{
r
.
std
():.
2
f
}
"
)
def
extract_node_features
(
name
,
g
,
input_nodes
,
node_embed
,
feats
,
device
):
def
extract_node_features
(
name
,
g
,
input_nodes
,
node_embed
,
feats
,
device
):
"""Extract the node features from embedding layer or raw features."""
"""Extract the node features from embedding layer or raw features."""
if
name
==
"ogbn-mag"
:
if
name
==
"ogbn-mag"
:
...
@@ -440,17 +392,16 @@ def train(
...
@@ -440,17 +392,16 @@ def train(
train_loader
,
train_loader
,
split_idx
,
split_idx
,
labels
,
labels
,
logger
,
device
,
device
,
run
,
):
):
print
(
"
s
tart training..."
)
print
(
"
S
tart training..."
)
category
=
"paper"
category
=
"paper"
# Typically, the best Validation performance is obtained after
# Typically, the best Validation performance is obtained after
# the 1st or 2nd epoch. This is why the max epoch is set to 3.
# the 1st or 2nd epoch. This is why the max epoch is set to 3.
for
epoch
in
range
(
3
):
for
epoch
in
range
(
3
):
num_train
=
split_idx
[
"train"
][
category
].
shape
[
0
]
num_train
=
split_idx
[
"train"
][
category
].
shape
[
0
]
t0
=
time
.
time
()
model
.
train
()
model
.
train
()
total_loss
=
0
total_loss
=
0
...
@@ -482,19 +433,10 @@ def train(
...
@@ -482,19 +433,10 @@ def train(
total_loss
+=
loss
.
item
()
*
batch_size
total_loss
+=
loss
.
item
()
*
batch_size
t1
=
time
.
time
()
loss
=
total_loss
/
num_train
loss
=
total_loss
/
num_train
# Evaluate the model on the train/val/test set.
# Evaluate the model on the val/test set.
train_acc
=
evaluate
(
dataset
,
g
,
feats
,
model
,
node_embed
,
labels
,
device
,
split_idx
[
"train"
],
)
valid_acc
=
evaluate
(
valid_acc
=
evaluate
(
dataset
,
dataset
,
g
,
g
,
...
@@ -517,20 +459,16 @@ def train(
...
@@ -517,20 +459,16 @@ def train(
split_idx
[
test_key
],
split_idx
[
test_key
],
save_test_submission
=
(
dataset
==
"ogb-lsc-mag240m"
),
save_test_submission
=
(
dataset
==
"ogb-lsc-mag240m"
),
)
)
logger
.
add_result
(
run
,
(
train_acc
,
valid_acc
,
test_acc
))
print
(
print
(
f
"Run:
{
run
+
1
:
02
d
}
, "
f
"Epoch:
{
epoch
+
1
:
02
d
}
, "
f
"Epoch:
{
epoch
+
1
:
02
d
}
, "
f
"Loss:
{
loss
:.
4
f
}
, "
f
"Loss:
{
loss
:.
4
f
}
, "
f
"Train:
{
100
*
train_acc
:.
2
f
}
%, "
f
"Valid:
{
100
*
valid_acc
:.
2
f
}
%, "
f
"Valid:
{
100
*
valid_acc
:.
2
f
}
%, "
f
"Test:
{
100
*
test_acc
:.
2
f
}
%"
f
"Test:
{
100
*
test_acc
:.
2
f
}
%, "
f
"Time
{
t1
-
t0
:.
4
f
}
"
)
)
return
logger
@
th
.
no_grad
()
@
t
orc
h
.
no_grad
()
def
evaluate
(
def
evaluate
(
dataset
,
dataset
,
g
,
g
,
...
@@ -580,9 +518,9 @@ def evaluate(
...
@@ -580,9 +518,9 @@ def evaluate(
y_hats
.
append
(
y_hat
.
cpu
())
y_hats
.
append
(
y_hat
.
cpu
())
y_true
.
append
(
labels
[
seeds
[
"paper"
].
cpu
()])
y_true
.
append
(
labels
[
seeds
[
"paper"
].
cpu
()])
y_pred
=
th
.
cat
(
y_hats
,
dim
=
0
)
y_pred
=
t
orc
h
.
cat
(
y_hats
,
dim
=
0
)
y_true
=
th
.
cat
(
y_true
,
dim
=
0
)
y_true
=
t
orc
h
.
cat
(
y_true
,
dim
=
0
)
y_true
=
th
.
unsqueeze
(
y_true
,
1
)
y_true
=
t
orc
h
.
unsqueeze
(
y_true
,
1
)
if
dataset
==
"ogb-lsc-mag240m"
:
if
dataset
==
"ogb-lsc-mag240m"
:
y_pred
=
y_pred
.
view
(
-
1
)
y_pred
=
y_pred
.
view
(
-
1
)
...
@@ -596,10 +534,9 @@ def evaluate(
...
@@ -596,10 +534,9 @@ def evaluate(
def
main
(
args
):
def
main
(
args
):
device
=
"cuda:0"
if
th
.
cuda
.
is_available
()
and
args
.
num_gpus
>
0
else
"cpu"
device
=
(
"cuda:0"
if
torch
.
cuda
.
is_available
()
and
args
.
num_gpus
>
0
else
"cpu"
# Initialize a logger.
)
logger
=
Logger
(
args
.
runs
)
# Prepare the data.
# Prepare the data.
g
,
labels
,
num_classes
,
split_idx
,
train_loader
,
feats
=
prepare_data
(
g
,
labels
,
num_classes
,
split_idx
,
train_loader
,
feats
=
prepare_data
(
...
@@ -625,7 +562,6 @@ def main(args):
...
@@ -625,7 +562,6 @@ def main(args):
f
"
{
sum
(
p
.
numel
()
for
p
in
model
.
parameters
())
}
"
f
"
{
sum
(
p
.
numel
()
for
p
in
model
.
parameters
())
}
"
)
)
for
run
in
range
(
args
.
runs
):
try
:
try
:
if
embed_layer
is
not
None
:
if
embed_layer
is
not
None
:
embed_layer
.
reset_parameters
()
embed_layer
.
reset_parameters
()
...
@@ -652,7 +588,7 @@ def main(args):
...
@@ -652,7 +588,7 @@ def main(args):
model
.
parameters
(),
model
.
parameters
(),
[]
if
embed_layer
is
None
else
embed_layer
.
parameters
(),
[]
if
embed_layer
is
None
else
embed_layer
.
parameters
(),
)
)
optimizer
=
th
.
optim
.
Adam
(
all_params
,
lr
=
0.01
)
optimizer
=
t
orc
h
.
optim
.
Adam
(
all_params
,
lr
=
0.01
)
# `expected_max`` is the number of physical cores on your machine.
# `expected_max`` is the number of physical cores on your machine.
# The `logical` parameter, when set to False, ensures that the count
# The `logical` parameter, when set to False, ensures that the count
...
@@ -665,7 +601,7 @@ def main(args):
...
@@ -665,7 +601,7 @@ def main(args):
f
"cores, please set any number less than
{
expected_max
}
"
,
f
"cores, please set any number less than
{
expected_max
}
"
,
file
=
sys
.
stderr
,
file
=
sys
.
stderr
,
)
)
logger
=
train
(
train
(
args
.
dataset
,
args
.
dataset
,
g
,
g
,
feats
,
feats
,
...
@@ -675,14 +611,23 @@ def main(args):
...
@@ -675,14 +611,23 @@ def main(args):
train_loader
,
train_loader
,
split_idx
,
split_idx
,
labels
,
labels
,
logger
,
device
,
device
,
run
,
)
)
logger
.
print_statistics
(
run
)
print
(
"Final performance: "
)
print
(
"Testing..."
)
logger
.
print_statistics
()
test_key
=
"test"
if
args
.
dataset
==
"ogbn-mag"
else
"test-dev"
test_acc
=
evaluate
(
args
.
dataset
,
g
,
feats
,
model
,
embed_layer
,
labels
,
device
,
split_idx
[
test_key
],
save_test_submission
=
(
args
.
dataset
==
"ogb-lsc-mag240m"
),
)
print
(
f
"Test accuracy
{
test_acc
*
100
:.
4
f
}
"
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
@@ -699,12 +644,6 @@ if __name__ == "__main__":
...
@@ -699,12 +644,6 @@ if __name__ == "__main__":
default
=
0
,
default
=
0
,
help
=
"Number of GPUs. Use 0 for CPU training."
,
help
=
"Number of GPUs. Use 0 for CPU training."
,
)
)
parser
.
add_argument
(
"--runs"
,
type
=
int
,
default
=
5
,
help
=
"Number of runs. Each run will train the model from scratch."
,
)
parser
.
add_argument
(
parser
.
add_argument
(
"--num_workers"
,
"--num_workers"
,
type
=
int
,
type
=
int
,
...
@@ -714,7 +653,7 @@ if __name__ == "__main__":
...
@@ -714,7 +653,7 @@ if __name__ == "__main__":
parser
.
add_argument
(
parser
.
add_argument
(
"--rootdir"
,
"--rootdir"
,
type
=
str
,
type
=
str
,
default
=
"./"
,
default
=
"./
dataset/
"
,
help
=
"Directory to download the OGB dataset."
,
help
=
"Directory to download the OGB dataset."
,
)
)
parser
.
add_argument
(
parser
.
add_argument
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment