Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
9836f78e
Unverified
Commit
9836f78e
authored
Feb 19, 2023
by
Hongzhi (Steve), Chen
Committed by
GitHub
Feb 19, 2023
Browse files
autoformat (#5322)
Co-authored-by:
Ubuntu
<
ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal
>
parent
704bcaf6
Changes
57
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
528 additions
and
350 deletions
+528
-350
benchmarks/benchmarks/model_acc/bench_sage.py
benchmarks/benchmarks/model_acc/bench_sage.py
+1
-2
benchmarks/benchmarks/model_acc/bench_sage_ns.py
benchmarks/benchmarks/model_acc/bench_sage_ns.py
+45
-35
benchmarks/benchmarks/model_speed/bench_gat.py
benchmarks/benchmarks/model_speed/bench_gat.py
+2
-2
benchmarks/benchmarks/model_speed/bench_gat_ns.py
benchmarks/benchmarks/model_speed/bench_gat_ns.py
+73
-49
benchmarks/benchmarks/model_speed/bench_gcn_udf.py
benchmarks/benchmarks/model_speed/bench_gcn_udf.py
+2
-2
benchmarks/benchmarks/model_speed/bench_pinsage.py
benchmarks/benchmarks/model_speed/bench_pinsage.py
+3
-3
benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py
benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py
+155
-79
benchmarks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py
...marks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py
+143
-87
benchmarks/benchmarks/model_speed/bench_sage.py
benchmarks/benchmarks/model_speed/bench_sage.py
+2
-2
benchmarks/benchmarks/model_speed/bench_sage_ns.py
benchmarks/benchmarks/model_speed/bench_sage_ns.py
+33
-29
benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py
...arks/benchmarks/model_speed/bench_sage_unsupervised_ns.py
+4
-4
benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py
benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py
+3
-3
benchmarks/benchmarks/multigpu/bench_multigpu_sage.py
benchmarks/benchmarks/multigpu/bench_multigpu_sage.py
+58
-46
benchmarks/benchmarks/multigpu/rgcn_model.py
benchmarks/benchmarks/multigpu/rgcn_model.py
+1
-2
benchmarks/benchmarks/rgcn.py
benchmarks/benchmarks/rgcn.py
+1
-2
benchmarks/benchmarks/utils.py
benchmarks/benchmarks/utils.py
+2
-2
benchmarks/scripts/replace_branch.py
benchmarks/scripts/replace_branch.py
+0
-1
No files found.
benchmarks/benchmarks/model_acc/bench_sage.py
View file @
9836f78e
import
dgl
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
dgl
from
dgl.nn.pytorch
import
SAGEConv
from
..
import
utils
...
...
benchmarks/benchmarks/model_acc/bench_sage_ns.py
View file @
9836f78e
import
time
import
dgl
import
dgl.nn.pytorch
as
dglnn
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.multiprocessing
as
mp
from
torch.utils.data
import
DataLoader
import
dgl.nn.pytorch
as
dglnn
import
time
from
..
import
utils
class
SAGE
(
nn
.
Module
):
def
__init__
(
self
,
in_feats
,
n_hidden
,
n_classes
,
n_layers
,
activation
,
dropout
):
def
__init__
(
self
,
in_feats
,
n_hidden
,
n_classes
,
n_layers
,
activation
,
dropout
):
super
().
__init__
()
self
.
n_layers
=
n_layers
self
.
n_hidden
=
n_hidden
self
.
n_classes
=
n_classes
self
.
layers
=
nn
.
ModuleList
()
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
in_feats
,
n_hidden
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
in_feats
,
n_hidden
,
"
mean
"
))
for
i
in
range
(
1
,
n_layers
-
1
):
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_hidden
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_classes
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_hidden
,
"
mean
"
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_classes
,
"
mean
"
))
self
.
dropout
=
nn
.
Dropout
(
dropout
)
self
.
activation
=
activation
...
...
@@ -55,8 +52,10 @@ class SAGE(nn.Module):
# on each layer are of course splitted in batches.
# TODO: can we standardize this?
for
l
,
layer
in
enumerate
(
self
.
layers
):
y
=
th
.
zeros
(
g
.
number_of_nodes
(),
self
.
n_hidden
if
l
!=
len
(
self
.
layers
)
-
1
else
self
.
n_classes
)
y
=
th
.
zeros
(
g
.
number_of_nodes
(),
self
.
n_hidden
if
l
!=
len
(
self
.
layers
)
-
1
else
self
.
n_classes
,
)
sampler
=
dgl
.
dataloading
.
MultiLayerFullNeighborSampler
(
1
)
dataloader
=
dgl
.
dataloading
.
DataLoader
(
...
...
@@ -66,7 +65,8 @@ class SAGE(nn.Module):
batch_size
=
batch_size
,
shuffle
=
True
,
drop_last
=
False
,
num_workers
=
4
)
num_workers
=
4
,
)
for
input_nodes
,
output_nodes
,
blocks
in
dataloader
:
block
=
blocks
[
0
]
...
...
@@ -113,20 +113,20 @@ def load_subtensor(g, seeds, input_nodes, device):
"""
Copys features and labels of a set of nodes onto GPU.
"""
batch_inputs
=
g
.
ndata
[
'
features
'
][
input_nodes
].
to
(
device
)
batch_labels
=
g
.
ndata
[
'
labels
'
][
seeds
].
to
(
device
)
batch_inputs
=
g
.
ndata
[
"
features
"
][
input_nodes
].
to
(
device
)
batch_labels
=
g
.
ndata
[
"
labels
"
][
seeds
].
to
(
device
)
return
batch_inputs
,
batch_labels
@
utils
.
benchmark
(
'
acc
'
,
600
)
@
utils
.
parametrize
(
'
data
'
,
[
'
ogbn-products
'
,
"reddit"
])
@
utils
.
benchmark
(
"
acc
"
,
600
)
@
utils
.
parametrize
(
"
data
"
,
[
"
ogbn-products
"
,
"reddit"
])
def
track_acc
(
data
):
data
=
utils
.
process_data
(
data
)
device
=
utils
.
get_bench_device
()
g
=
data
[
0
]
g
.
ndata
[
'
features
'
]
=
g
.
ndata
[
'
feat
'
]
g
.
ndata
[
'
labels
'
]
=
g
.
ndata
[
'
label
'
]
in_feats
=
g
.
ndata
[
'
features
'
].
shape
[
1
]
g
.
ndata
[
"
features
"
]
=
g
.
ndata
[
"
feat
"
]
g
.
ndata
[
"
labels
"
]
=
g
.
ndata
[
"
label
"
]
in_feats
=
g
.
ndata
[
"
features
"
].
shape
[
1
]
n_classes
=
data
.
num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu.
...
...
@@ -136,17 +136,18 @@ def track_acc(data):
num_epochs
=
20
num_hidden
=
16
num_layers
=
2
fan_out
=
'
5,10
'
fan_out
=
"
5,10
"
batch_size
=
1024
lr
=
0.003
dropout
=
0.5
num_workers
=
4
train_nid
=
th
.
nonzero
(
g
.
ndata
[
'
train_mask
'
],
as_tuple
=
True
)[
0
]
train_nid
=
th
.
nonzero
(
g
.
ndata
[
"
train_mask
"
],
as_tuple
=
True
)[
0
]
# Create PyTorch DataLoader for constructing blocks
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
(
[
int
(
fanout
)
for
fanout
in
fan_out
.
split
(
','
)])
[
int
(
fanout
)
for
fanout
in
fan_out
.
split
(
","
)]
)
dataloader
=
dgl
.
dataloading
.
DataLoader
(
g
,
train_nid
,
...
...
@@ -154,7 +155,8 @@ def track_acc(data):
batch_size
=
batch_size
,
shuffle
=
True
,
drop_last
=
False
,
num_workers
=
num_workers
)
num_workers
=
num_workers
,
)
# Define model and optimizer
model
=
SAGE
(
in_feats
,
num_hidden
,
n_classes
,
num_layers
,
F
.
relu
,
dropout
)
...
...
@@ -166,10 +168,10 @@ def track_acc(data):
# dry run one epoch
for
step
,
(
input_nodes
,
seeds
,
blocks
)
in
enumerate
(
dataloader
):
# Load the input features as well as output labels
#batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device)
#
batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device)
blocks
=
[
block
.
int
().
to
(
device
)
for
block
in
blocks
]
batch_inputs
=
blocks
[
0
].
srcdata
[
'
features
'
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
'
labels
'
]
batch_inputs
=
blocks
[
0
].
srcdata
[
"
features
"
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
"
labels
"
]
# Compute loss and prediction
batch_pred
=
model
(
blocks
,
batch_inputs
)
...
...
@@ -184,10 +186,10 @@ def track_acc(data):
# blocks.
for
step
,
(
input_nodes
,
seeds
,
blocks
)
in
enumerate
(
dataloader
):
# Load the input features as well as output labels
#batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device)
#
batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device)
blocks
=
[
block
.
int
().
to
(
device
)
for
block
in
blocks
]
batch_inputs
=
blocks
[
0
].
srcdata
[
'
features
'
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
'
labels
'
]
batch_inputs
=
blocks
[
0
].
srcdata
[
"
features
"
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
"
labels
"
]
# Compute loss and prediction
batch_pred
=
model
(
blocks
,
batch_inputs
)
...
...
@@ -198,8 +200,16 @@ def track_acc(data):
test_g
=
g
test_nid
=
th
.
nonzero
(
~
(
test_g
.
ndata
[
'train_mask'
]
|
test_g
.
ndata
[
'val_mask'
]),
as_tuple
=
True
)[
0
]
~
(
test_g
.
ndata
[
"train_mask"
]
|
test_g
.
ndata
[
"val_mask"
]),
as_tuple
=
True
)[
0
]
test_acc
=
evaluate
(
model
,
test_g
,
test_g
.
ndata
[
'features'
],
test_g
.
ndata
[
'labels'
],
test_nid
,
batch_size
,
device
)
model
,
test_g
,
test_g
.
ndata
[
"features"
],
test_g
.
ndata
[
"labels"
],
test_nid
,
batch_size
,
device
,
)
return
test_acc
.
item
()
benchmarks/benchmarks/model_speed/bench_gat.py
View file @
9836f78e
import
time
import
dgl
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
dgl
from
dgl.nn.pytorch
import
GATConv
from
..
import
utils
...
...
benchmarks/benchmarks/model_speed/bench_gat_ns.py
View file @
9836f78e
import
time
import
traceback
import
dgl
import
dgl.nn.pytorch
as
dglnn
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.multiprocessing
as
mp
from
torch.utils.data
import
DataLoader
import
dgl.nn.pytorch
as
dglnn
import
time
import
traceback
from
..
import
utils
class
GAT
(
nn
.
Module
):
def
__init__
(
self
,
def
__init__
(
self
,
in_feats
,
num_heads
,
n_hidden
,
n_classes
,
n_layers
,
activation
,
dropout
=
0.
):
dropout
=
0.0
,
):
super
().
__init__
()
self
.
n_layers
=
n_layers
self
.
n_hidden
=
n_hidden
self
.
n_classes
=
n_classes
self
.
layers
=
nn
.
ModuleList
()
self
.
num_heads
=
num_heads
self
.
layers
.
append
(
dglnn
.
GATConv
(
in_feats
,
self
.
layers
.
append
(
dglnn
.
GATConv
(
in_feats
,
n_hidden
,
num_heads
=
num_heads
,
feat_drop
=
dropout
,
attn_drop
=
dropout
,
activation
=
activation
,
negative_slope
=
0.2
))
negative_slope
=
0.2
,
)
)
for
i
in
range
(
1
,
n_layers
-
1
):
self
.
layers
.
append
(
dglnn
.
GATConv
(
n_hidden
*
num_heads
,
self
.
layers
.
append
(
dglnn
.
GATConv
(
n_hidden
*
num_heads
,
n_hidden
,
num_heads
=
num_heads
,
feat_drop
=
dropout
,
attn_drop
=
dropout
,
activation
=
activation
,
negative_slope
=
0.2
))
self
.
layers
.
append
(
dglnn
.
GATConv
(
n_hidden
*
num_heads
,
negative_slope
=
0.2
,
)
)
self
.
layers
.
append
(
dglnn
.
GATConv
(
n_hidden
*
num_heads
,
n_classes
,
num_heads
=
num_heads
,
feat_drop
=
dropout
,
attn_drop
=
dropout
,
activation
=
None
,
negative_slope
=
0.2
))
negative_slope
=
0.2
,
)
)
def
forward
(
self
,
blocks
,
x
):
h
=
x
...
...
@@ -58,24 +74,26 @@ class GAT(nn.Module):
h
=
h
.
mean
(
1
)
return
h
.
log_softmax
(
dim
=-
1
)
def
load_subtensor
(
g
,
seeds
,
input_nodes
,
device
):
"""
Copys features and labels of a set of nodes onto GPU.
"""
batch_inputs
=
g
.
ndata
[
'
features
'
][
input_nodes
].
to
(
device
)
batch_labels
=
g
.
ndata
[
'
labels
'
][
seeds
].
to
(
device
)
batch_inputs
=
g
.
ndata
[
"
features
"
][
input_nodes
].
to
(
device
)
batch_labels
=
g
.
ndata
[
"
labels
"
][
seeds
].
to
(
device
)
return
batch_inputs
,
batch_labels
@
utils
.
benchmark
(
'time'
,
600
)
@
utils
.
parametrize
(
'data'
,
[
'reddit'
,
'ogbn-products'
])
@
utils
.
benchmark
(
"time"
,
600
)
@
utils
.
parametrize
(
"data"
,
[
"reddit"
,
"ogbn-products"
])
def
track_time
(
data
):
data
=
utils
.
process_data
(
data
)
device
=
utils
.
get_bench_device
()
g
=
data
[
0
]
g
.
ndata
[
'
features
'
]
=
g
.
ndata
[
'
feat
'
]
g
.
ndata
[
'
labels
'
]
=
g
.
ndata
[
'
label
'
]
g
.
ndata
[
"
features
"
]
=
g
.
ndata
[
"
feat
"
]
g
.
ndata
[
"
labels
"
]
=
g
.
ndata
[
"
label
"
]
g
=
g
.
remove_self_loop
().
add_self_loop
()
in_feats
=
g
.
ndata
[
'
features
'
].
shape
[
1
]
in_feats
=
g
.
ndata
[
"
features
"
].
shape
[
1
]
n_classes
=
data
.
num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu.
...
...
@@ -85,7 +103,7 @@ def track_time(data):
num_hidden
=
16
num_heads
=
8
num_layers
=
2
fan_out
=
'
10,25
'
fan_out
=
"
10,25
"
batch_size
=
1024
lr
=
0.003
dropout
=
0.5
...
...
@@ -93,11 +111,12 @@ def track_time(data):
iter_start
=
3
iter_count
=
10
train_nid
=
th
.
nonzero
(
g
.
ndata
[
'
train_mask
'
],
as_tuple
=
True
)[
0
]
train_nid
=
th
.
nonzero
(
g
.
ndata
[
"
train_mask
"
],
as_tuple
=
True
)[
0
]
# Create PyTorch DataLoader for constructing blocks
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
(
[
int
(
fanout
)
for
fanout
in
fan_out
.
split
(
','
)])
[
int
(
fanout
)
for
fanout
in
fan_out
.
split
(
","
)]
)
dataloader
=
dgl
.
dataloading
.
DataLoader
(
g
,
train_nid
,
...
...
@@ -105,10 +124,13 @@ def track_time(data):
batch_size
=
batch_size
,
shuffle
=
True
,
drop_last
=
False
,
num_workers
=
num_workers
)
num_workers
=
num_workers
,
)
# Define model and optimizer
model
=
GAT
(
in_feats
,
num_heads
,
num_hidden
,
n_classes
,
num_layers
,
F
.
relu
,
dropout
)
model
=
GAT
(
in_feats
,
num_heads
,
num_hidden
,
n_classes
,
num_layers
,
F
.
relu
,
dropout
)
model
=
model
.
to
(
device
)
loss_fcn
=
nn
.
CrossEntropyLoss
()
loss_fcn
=
loss_fcn
.
to
(
device
)
...
...
@@ -125,8 +147,8 @@ def track_time(data):
for
step
,
(
input_nodes
,
seeds
,
blocks
)
in
enumerate
(
dataloader
):
# Load the input features as well as output labels
blocks
=
[
block
.
int
().
to
(
device
)
for
block
in
blocks
]
batch_inputs
=
blocks
[
0
].
srcdata
[
'
features
'
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
'
labels
'
]
batch_inputs
=
blocks
[
0
].
srcdata
[
"
features
"
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
"
labels
"
]
# Compute loss and prediction
batch_pred
=
model
(
blocks
,
batch_inputs
)
...
...
@@ -138,7 +160,9 @@ def track_time(data):
# start timer at before iter_start
if
step
==
iter_start
-
1
:
t0
=
time
.
time
()
elif
step
==
iter_count
+
iter_start
-
1
:
# time iter_count iterations
elif
(
step
==
iter_count
+
iter_start
-
1
):
# time iter_count iterations
break
t1
=
time
.
time
()
...
...
benchmarks/benchmarks/model_speed/bench_gcn_udf.py
View file @
9836f78e
import
time
import
dgl
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
dgl
from
..
import
utils
...
...
benchmarks/benchmarks/model_speed/bench_pinsage.py
View file @
9836f78e
...
...
@@ -2,15 +2,15 @@ import argparse
import
pickle
import
time
import
dgl
import
dgl.function
as
fn
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
torch.utils.data
import
DataLoader
,
IterableDataset
import
dgl
import
dgl.function
as
fn
from
..
import
utils
...
...
benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py
View file @
9836f78e
import
dgl
import
itertools
import
time
import
traceback
import
dgl
import
dgl.nn.pytorch
as
dglnn
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.multiprocessing
as
mp
from
torch.utils.data
import
DataLoader
import
dgl.nn.pytorch
as
dglnn
import
time
import
traceback
from
..
import
utils
class
RelGraphConvLayer
(
nn
.
Module
):
r
"""Relational graph convolution layer.
...
...
@@ -36,7 +38,9 @@ class RelGraphConvLayer(nn.Module):
dropout : float, optional
Dropout rate. Default: 0.0
"""
def
__init__
(
self
,
def
__init__
(
self
,
in_feat
,
out_feat
,
rel_names
,
...
...
@@ -46,7 +50,8 @@ class RelGraphConvLayer(nn.Module):
bias
=
True
,
activation
=
None
,
self_loop
=
False
,
dropout
=
0.0
):
dropout
=
0.0
):
super
(
RelGraphConvLayer
,
self
).
__init__
()
self
.
in_feat
=
in_feat
self
.
out_feat
=
out_feat
...
...
@@ -56,19 +61,29 @@ class RelGraphConvLayer(nn.Module):
self
.
activation
=
activation
self
.
self_loop
=
self_loop
self
.
conv
=
dglnn
.
HeteroGraphConv
({
rel
:
dglnn
.
GraphConv
(
in_feat
,
out_feat
,
norm
=
'right'
,
weight
=
False
,
bias
=
False
)
self
.
conv
=
dglnn
.
HeteroGraphConv
(
{
rel
:
dglnn
.
GraphConv
(
in_feat
,
out_feat
,
norm
=
"right"
,
weight
=
False
,
bias
=
False
)
for
rel
in
rel_names
})
}
)
self
.
use_weight
=
weight
self
.
use_basis
=
num_bases
<
len
(
self
.
rel_names
)
and
weight
if
self
.
use_weight
:
if
self
.
use_basis
:
self
.
basis
=
dglnn
.
WeightBasis
((
in_feat
,
out_feat
),
num_bases
,
len
(
self
.
rel_names
))
self
.
basis
=
dglnn
.
WeightBasis
(
(
in_feat
,
out_feat
),
num_bases
,
len
(
self
.
rel_names
)
)
else
:
self
.
weight
=
nn
.
Parameter
(
th
.
Tensor
(
len
(
self
.
rel_names
),
in_feat
,
out_feat
))
nn
.
init
.
xavier_uniform_
(
self
.
weight
,
gain
=
nn
.
init
.
calculate_gain
(
'relu'
))
self
.
weight
=
nn
.
Parameter
(
th
.
Tensor
(
len
(
self
.
rel_names
),
in_feat
,
out_feat
)
)
nn
.
init
.
xavier_uniform_
(
self
.
weight
,
gain
=
nn
.
init
.
calculate_gain
(
"relu"
)
)
# bias
if
bias
:
...
...
@@ -78,8 +93,9 @@ class RelGraphConvLayer(nn.Module):
# weight for self loop
if
self
.
self_loop
:
self
.
loop_weight
=
nn
.
Parameter
(
th
.
Tensor
(
in_feat
,
out_feat
))
nn
.
init
.
xavier_uniform_
(
self
.
loop_weight
,
gain
=
nn
.
init
.
calculate_gain
(
'relu'
))
nn
.
init
.
xavier_uniform_
(
self
.
loop_weight
,
gain
=
nn
.
init
.
calculate_gain
(
"relu"
)
)
self
.
dropout
=
nn
.
Dropout
(
dropout
)
...
...
@@ -101,14 +117,18 @@ class RelGraphConvLayer(nn.Module):
g
=
g
.
local_var
()
if
self
.
use_weight
:
weight
=
self
.
basis
()
if
self
.
use_basis
else
self
.
weight
wdict
=
{
self
.
rel_names
[
i
]
:
{
'weight'
:
w
.
squeeze
(
0
)}
for
i
,
w
in
enumerate
(
th
.
split
(
weight
,
1
,
dim
=
0
))}
wdict
=
{
self
.
rel_names
[
i
]:
{
"weight"
:
w
.
squeeze
(
0
)}
for
i
,
w
in
enumerate
(
th
.
split
(
weight
,
1
,
dim
=
0
))
}
else
:
wdict
=
{}
if
g
.
is_block
:
inputs_src
=
inputs
inputs_dst
=
{
k
:
v
[:
g
.
number_of_dst_nodes
(
k
)]
for
k
,
v
in
inputs
.
items
()}
inputs_dst
=
{
k
:
v
[:
g
.
number_of_dst_nodes
(
k
)]
for
k
,
v
in
inputs
.
items
()
}
else
:
inputs_src
=
inputs_dst
=
inputs
...
...
@@ -122,20 +142,24 @@ class RelGraphConvLayer(nn.Module):
if
self
.
activation
:
h
=
self
.
activation
(
h
)
return
self
.
dropout
(
h
)
return
{
ntype
:
_apply
(
ntype
,
h
)
for
ntype
,
h
in
hs
.
items
()}
return
{
ntype
:
_apply
(
ntype
,
h
)
for
ntype
,
h
in
hs
.
items
()}
class
RelGraphEmbed
(
nn
.
Module
):
r
"""Embedding layer for featureless heterograph."""
def
__init__
(
self
,
def
__init__
(
self
,
g
,
device
,
embed_size
,
num_nodes
,
node_feats
,
embed_name
=
'
embed
'
,
embed_name
=
"
embed
"
,
activation
=
None
,
dropout
=
0.0
):
dropout
=
0.0
,
):
super
(
RelGraphEmbed
,
self
).
__init__
()
self
.
g
=
g
self
.
device
=
device
...
...
@@ -150,7 +174,9 @@ class RelGraphEmbed(nn.Module):
self
.
node_embeds
=
nn
.
ModuleDict
()
for
ntype
in
g
.
ntypes
:
if
node_feats
[
ntype
]
is
None
:
sparse_emb
=
th
.
nn
.
Embedding
(
num_nodes
[
ntype
],
embed_size
,
sparse
=
True
)
sparse_emb
=
th
.
nn
.
Embedding
(
num_nodes
[
ntype
],
embed_size
,
sparse
=
True
)
nn
.
init
.
uniform_
(
sparse_emb
.
weight
,
-
1.0
,
1.0
)
self
.
node_embeds
[
ntype
]
=
sparse_emb
else
:
...
...
@@ -177,19 +203,28 @@ class RelGraphEmbed(nn.Module):
embeds
=
{}
for
ntype
in
block
.
ntypes
:
if
self
.
node_feats
[
ntype
]
is
None
:
embeds
[
ntype
]
=
self
.
node_embeds
[
ntype
](
block
.
nodes
(
ntype
)).
to
(
self
.
device
)
embeds
[
ntype
]
=
self
.
node_embeds
[
ntype
](
block
.
nodes
(
ntype
)).
to
(
self
.
device
)
else
:
embeds
[
ntype
]
=
self
.
node_feats
[
ntype
][
block
.
nodes
(
ntype
)].
to
(
self
.
device
)
@
self
.
embeds
[
ntype
]
embeds
[
ntype
]
=
(
self
.
node_feats
[
ntype
][
block
.
nodes
(
ntype
)].
to
(
self
.
device
)
@
self
.
embeds
[
ntype
]
)
return
embeds
class
EntityClassify
(
nn
.
Module
):
def
__init__
(
self
,
def
__init__
(
self
,
g
,
h_dim
,
out_dim
,
h_dim
,
out_dim
,
num_bases
,
num_hidden_layers
=
1
,
dropout
=
0
,
use_self_loop
=
False
):
use_self_loop
=
False
,
):
super
(
EntityClassify
,
self
).
__init__
()
self
.
g
=
g
self
.
h_dim
=
h_dim
...
...
@@ -206,34 +241,56 @@ class EntityClassify(nn.Module):
self
.
layers
=
nn
.
ModuleList
()
# i2h
self
.
layers
.
append
(
RelGraphConvLayer
(
self
.
h_dim
,
self
.
h_dim
,
self
.
rel_names
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
,
weight
=
False
))
self
.
layers
.
append
(
RelGraphConvLayer
(
self
.
h_dim
,
self
.
h_dim
,
self
.
rel_names
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
,
weight
=
False
,
)
)
# h2h
for
i
in
range
(
self
.
num_hidden_layers
):
self
.
layers
.
append
(
RelGraphConvLayer
(
self
.
h_dim
,
self
.
h_dim
,
self
.
rel_names
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
))
self
.
layers
.
append
(
RelGraphConvLayer
(
self
.
h_dim
,
self
.
h_dim
,
self
.
rel_names
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
,
)
)
# h2o
self
.
layers
.
append
(
RelGraphConvLayer
(
self
.
h_dim
,
self
.
out_dim
,
self
.
rel_names
,
self
.
num_bases
,
activation
=
None
,
self_loop
=
self
.
use_self_loop
))
self
.
layers
.
append
(
RelGraphConvLayer
(
self
.
h_dim
,
self
.
out_dim
,
self
.
rel_names
,
self
.
num_bases
,
activation
=
None
,
self_loop
=
self
.
use_self_loop
,
)
)
def
forward
(
self
,
h
,
blocks
):
for
layer
,
block
in
zip
(
self
.
layers
,
blocks
):
h
=
layer
(
block
,
h
)
return
h
@
utils
.
benchmark
(
'time'
,
600
)
@
utils
.
parametrize
(
'data'
,
[
'ogbn-mag'
])
@
utils
.
benchmark
(
"time"
,
600
)
@
utils
.
parametrize
(
"data"
,
[
"ogbn-mag"
])
def
track_time
(
data
):
dataset
=
utils
.
process_data
(
data
)
device
=
utils
.
get_bench_device
()
if
data
==
'
ogbn-mag
'
:
if
data
==
"
ogbn-mag
"
:
n_bases
=
2
l2norm
=
0
else
:
...
...
@@ -252,35 +309,50 @@ def track_time(data):
hg
=
dataset
[
0
]
category
=
dataset
.
predict_category
num_classes
=
dataset
.
num_classes
train_mask
=
hg
.
nodes
[
category
].
data
.
pop
(
'
train_mask
'
)
train_mask
=
hg
.
nodes
[
category
].
data
.
pop
(
"
train_mask
"
)
train_idx
=
th
.
nonzero
(
train_mask
,
as_tuple
=
False
).
squeeze
()
labels
=
hg
.
nodes
[
category
].
data
.
pop
(
'
labels
'
)
labels
=
hg
.
nodes
[
category
].
data
.
pop
(
"
labels
"
)
node_feats
=
{}
num_nodes
=
{}
for
ntype
in
hg
.
ntypes
:
node_feats
[
ntype
]
=
hg
.
nodes
[
ntype
].
data
[
'feat'
]
if
'feat'
in
hg
.
nodes
[
ntype
].
data
else
None
node_feats
[
ntype
]
=
(
hg
.
nodes
[
ntype
].
data
[
"feat"
]
if
"feat"
in
hg
.
nodes
[
ntype
].
data
else
None
)
num_nodes
[
ntype
]
=
hg
.
number_of_nodes
(
ntype
)
embed_layer
=
RelGraphEmbed
(
hg
,
device
,
n_hidden
,
num_nodes
,
node_feats
)
model
=
EntityClassify
(
hg
,
model
=
EntityClassify
(
hg
,
n_hidden
,
num_classes
,
num_bases
=
n_bases
,
num_hidden_layers
=
n_layers
-
2
,
dropout
=
dropout
,
use_self_loop
=
use_self_loop
)
use_self_loop
=
use_self_loop
,
)
embed_layer
=
embed_layer
.
to
(
device
)
model
=
model
.
to
(
device
)
all_params
=
itertools
.
chain
(
model
.
parameters
(),
embed_layer
.
embeds
.
parameters
())
all_params
=
itertools
.
chain
(
model
.
parameters
(),
embed_layer
.
embeds
.
parameters
()
)
optimizer
=
th
.
optim
.
Adam
(
all_params
,
lr
=
lr
,
weight_decay
=
l2norm
)
sparse_optimizer
=
th
.
optim
.
SparseAdam
(
list
(
embed_layer
.
node_embeds
.
parameters
()),
lr
=
lr
)
sparse_optimizer
=
th
.
optim
.
SparseAdam
(
list
(
embed_layer
.
node_embeds
.
parameters
()),
lr
=
lr
)
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
([
fanout
]
*
n_layers
)
loader
=
dgl
.
dataloading
.
DataLoader
(
hg
,
{
category
:
train_idx
},
sampler
,
batch_size
=
batch_size
,
shuffle
=
True
,
num_workers
=
4
)
hg
,
{
category
:
train_idx
},
sampler
,
batch_size
=
batch_size
,
shuffle
=
True
,
num_workers
=
4
,
)
print
(
"start training..."
)
model
.
train
()
...
...
@@ -292,11 +364,13 @@ def track_time(data):
with
loader
.
enable_cpu_affinity
():
for
step
,
(
input_nodes
,
seeds
,
blocks
)
in
enumerate
(
loader
):
blocks
=
[
blk
.
to
(
device
)
for
blk
in
blocks
]
seeds
=
seeds
[
category
]
# we only predict the nodes with type "category"
seeds
=
seeds
[
category
]
# we only predict the nodes with type "category"
batch_tic
=
time
.
time
()
emb
=
embed_layer
(
blocks
[
0
])
lbl
=
labels
[
seeds
].
to
(
device
)
emb
=
{
k
:
e
.
to
(
device
)
for
k
,
e
in
emb
.
items
()}
emb
=
{
k
:
e
.
to
(
device
)
for
k
,
e
in
emb
.
items
()}
logits
=
model
(
emb
,
blocks
)[
category
]
loss
=
F
.
cross_entropy
(
logits
,
lbl
)
loss
.
backward
()
...
...
@@ -306,7 +380,9 @@ def track_time(data):
# start timer at before iter_start
if
step
==
iter_start
-
1
:
t0
=
time
.
time
()
elif
step
==
iter_count
+
iter_start
-
1
:
# time iter_count iterations
elif
(
step
==
iter_count
+
iter_start
-
1
):
# time iter_count iterations
break
t1
=
time
.
time
()
...
...
benchmarks/benchmarks/model_speed/bench_rgcn_homogeneous_ns.py
View file @
9836f78e
import
dgl
import
itertools
import
time
import
dgl
import
dgl.nn.pytorch
as
dglnn
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.multiprocessing
as
mp
from
torch.utils.data
import
DataLoader
import
dgl.nn.pytorch
as
dglnn
from
dgl.nn
import
RelGraphConv
import
time
from
torch.utils.data
import
DataLoader
from
..
import
utils
class
EntityClassify
(
nn
.
Module
):
"""
Entity classification class for RGCN
"""Entity classification class for RGCN
Parameters
----------
device : int
...
...
@@ -35,7 +37,9 @@ class EntityClassify(nn.Module):
use_self_loop : bool
Use self loop if True, default False.
"""
def
__init__
(
self
,
def
__init__
(
self
,
device
,
num_nodes
,
h_dim
,
...
...
@@ -45,7 +49,8 @@ class EntityClassify(nn.Module):
num_hidden_layers
=
1
,
dropout
=
0
,
use_self_loop
=
False
,
layer_norm
=
False
):
layer_norm
=
False
,
):
super
(
EntityClassify
,
self
).
__init__
()
self
.
device
=
device
self
.
num_nodes
=
num_nodes
...
...
@@ -60,22 +65,47 @@ class EntityClassify(nn.Module):
self
.
layers
=
nn
.
ModuleList
()
# i2h
self
.
layers
.
append
(
RelGraphConv
(
self
.
h_dim
,
self
.
h_dim
,
self
.
num_rels
,
"basis"
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
,
layer_norm
=
layer_norm
))
self
.
layers
.
append
(
RelGraphConv
(
self
.
h_dim
,
self
.
h_dim
,
self
.
num_rels
,
"basis"
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
,
layer_norm
=
layer_norm
,
)
)
# h2h
for
idx
in
range
(
self
.
num_hidden_layers
):
self
.
layers
.
append
(
RelGraphConv
(
self
.
h_dim
,
self
.
h_dim
,
self
.
num_rels
,
"basis"
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
,
layer_norm
=
layer_norm
))
self
.
layers
.
append
(
RelGraphConv
(
self
.
h_dim
,
self
.
h_dim
,
self
.
num_rels
,
"basis"
,
self
.
num_bases
,
activation
=
F
.
relu
,
self_loop
=
self
.
use_self_loop
,
dropout
=
self
.
dropout
,
layer_norm
=
layer_norm
,
)
)
# h2o
self
.
layers
.
append
(
RelGraphConv
(
self
.
h_dim
,
self
.
out_dim
,
self
.
num_rels
,
"basis"
,
self
.
num_bases
,
activation
=
None
,
self
.
layers
.
append
(
RelGraphConv
(
self
.
h_dim
,
self
.
out_dim
,
self
.
num_rels
,
"basis"
,
self
.
num_bases
,
activation
=
None
,
self_loop
=
self
.
use_self_loop
,
layer_norm
=
layer_norm
))
layer_norm
=
layer_norm
,
)
)
def
forward
(
self
,
blocks
,
feats
,
norm
=
None
):
if
blocks
is
None
:
...
...
@@ -84,9 +114,10 @@ class EntityClassify(nn.Module):
h
=
feats
for
layer
,
block
in
zip
(
self
.
layers
,
blocks
):
block
=
block
.
to
(
self
.
device
)
h
=
layer
(
block
,
h
,
block
.
edata
[
'
etype
'
],
block
.
edata
[
'
norm
'
])
h
=
layer
(
block
,
h
,
block
.
edata
[
"
etype
"
],
block
.
edata
[
"
norm
"
])
return
h
class
RelGraphEmbedLayer
(
nn
.
Module
):
r
"""Embedding layer for featureless heterograph.
Parameters
...
...
@@ -107,7 +138,9 @@ class RelGraphEmbedLayer(nn.Module):
embed_name : str, optional
Embed name
"""
def
__init__
(
self
,
def
__init__
(
self
,
device
,
num_nodes
,
node_tids
,
...
...
@@ -115,7 +148,8 @@ class RelGraphEmbedLayer(nn.Module):
input_size
,
embed_size
,
sparse_emb
=
False
,
embed_name
=
'embed'
):
embed_name
=
"embed"
,
):
super
(
RelGraphEmbedLayer
,
self
).
__init__
()
self
.
device
=
device
self
.
embed_size
=
embed_size
...
...
@@ -135,7 +169,9 @@ class RelGraphEmbedLayer(nn.Module):
nn
.
init
.
xavier_uniform_
(
embed
)
self
.
embeds
[
str
(
ntype
)]
=
embed
self
.
node_embeds
=
th
.
nn
.
Embedding
(
node_tids
.
shape
[
0
],
self
.
embed_size
,
sparse
=
self
.
sparse_emb
)
self
.
node_embeds
=
th
.
nn
.
Embedding
(
node_tids
.
shape
[
0
],
self
.
embed_size
,
sparse
=
self
.
sparse_emb
)
nn
.
init
.
uniform_
(
self
.
node_embeds
.
weight
,
-
1.0
,
1.0
)
def
forward
(
self
,
node_ids
,
node_tids
,
type_ids
,
features
):
...
...
@@ -157,35 +193,40 @@ class RelGraphEmbedLayer(nn.Module):
embeddings as the input of the next layer
"""
tsd_ids
=
node_ids
.
to
(
self
.
node_embeds
.
weight
.
device
)
embeds
=
th
.
empty
(
node_ids
.
shape
[
0
],
self
.
embed_size
,
device
=
self
.
device
)
embeds
=
th
.
empty
(
node_ids
.
shape
[
0
],
self
.
embed_size
,
device
=
self
.
device
)
for
ntype
in
range
(
self
.
num_of_ntype
):
if
features
[
ntype
]
is
not
None
:
loc
=
node_tids
==
ntype
embeds
[
loc
]
=
features
[
ntype
][
type_ids
[
loc
]].
to
(
self
.
device
)
@
self
.
embeds
[
str
(
ntype
)].
to
(
self
.
device
)
embeds
[
loc
]
=
features
[
ntype
][
type_ids
[
loc
]].
to
(
self
.
device
)
@
self
.
embeds
[
str
(
ntype
)].
to
(
self
.
device
)
else
:
loc
=
node_tids
==
ntype
embeds
[
loc
]
=
self
.
node_embeds
(
tsd_ids
[
loc
]).
to
(
self
.
device
)
return
embeds
@
utils
.
benchmark
(
'time'
,
600
)
@
utils
.
parametrize
(
'data'
,
[
'am'
,
'ogbn-mag'
])
@
utils
.
benchmark
(
"time"
,
600
)
@
utils
.
parametrize
(
"data"
,
[
"am"
,
"ogbn-mag"
])
def
track_time
(
data
):
dataset
=
utils
.
process_data
(
data
)
device
=
utils
.
get_bench_device
()
if
data
==
'
am
'
:
if
data
==
"
am
"
:
batch_size
=
64
n_bases
=
40
l2norm
=
5e-4
elif
data
==
'
ogbn-mag
'
:
elif
data
==
"
ogbn-mag
"
:
batch_size
=
1024
n_bases
=
2
l2norm
=
0
else
:
raise
ValueError
()
fanouts
=
[
25
,
15
]
fanouts
=
[
25
,
15
]
n_layers
=
2
n_hidden
=
64
dropout
=
0.5
...
...
@@ -198,18 +239,18 @@ def track_time(data):
hg
=
dataset
[
0
]
category
=
dataset
.
predict_category
num_classes
=
dataset
.
num_classes
train_mask
=
hg
.
nodes
[
category
].
data
.
pop
(
'
train_mask
'
)
train_mask
=
hg
.
nodes
[
category
].
data
.
pop
(
"
train_mask
"
)
train_idx
=
th
.
nonzero
(
train_mask
,
as_tuple
=
False
).
squeeze
()
labels
=
hg
.
nodes
[
category
].
data
.
pop
(
'
labels
'
).
to
(
device
)
labels
=
hg
.
nodes
[
category
].
data
.
pop
(
"
labels
"
).
to
(
device
)
num_of_ntype
=
len
(
hg
.
ntypes
)
num_rels
=
len
(
hg
.
canonical_etypes
)
node_feats
=
[]
for
ntype
in
hg
.
ntypes
:
if
len
(
hg
.
nodes
[
ntype
].
data
)
==
0
or
'
feat
'
not
in
hg
.
nodes
[
ntype
].
data
:
if
len
(
hg
.
nodes
[
ntype
].
data
)
==
0
or
"
feat
"
not
in
hg
.
nodes
[
ntype
].
data
:
node_feats
.
append
(
None
)
else
:
feat
=
hg
.
nodes
[
ntype
].
data
.
pop
(
'
feat
'
)
feat
=
hg
.
nodes
[
ntype
].
data
.
pop
(
"
feat
"
)
node_feats
.
append
(
feat
.
share_memory_
())
# get target category id
...
...
@@ -218,26 +259,28 @@ def track_time(data):
if
ntype
==
category
:
category_id
=
i
g
=
dgl
.
to_homogeneous
(
hg
)
u
,
v
,
eid
=
g
.
all_edges
(
form
=
'
all
'
)
u
,
v
,
eid
=
g
.
all_edges
(
form
=
"
all
"
)
# global norm
_
,
inverse_index
,
count
=
th
.
unique
(
v
,
return_inverse
=
True
,
return_counts
=
True
)
_
,
inverse_index
,
count
=
th
.
unique
(
v
,
return_inverse
=
True
,
return_counts
=
True
)
degrees
=
count
[
inverse_index
]
norm
=
th
.
ones
(
eid
.
shape
[
0
])
/
degrees
norm
=
norm
.
unsqueeze
(
1
)
g
.
edata
[
'
norm
'
]
=
norm
g
.
edata
[
'
etype
'
]
=
g
.
edata
[
dgl
.
ETYPE
]
g
.
ndata
[
'
type_id
'
]
=
g
.
ndata
[
dgl
.
NID
]
g
.
ndata
[
'
ntype
'
]
=
g
.
ndata
[
dgl
.
NTYPE
]
g
.
edata
[
"
norm
"
]
=
norm
g
.
edata
[
"
etype
"
]
=
g
.
edata
[
dgl
.
ETYPE
]
g
.
ndata
[
"
type_id
"
]
=
g
.
ndata
[
dgl
.
NID
]
g
.
ndata
[
"
ntype
"
]
=
g
.
ndata
[
dgl
.
NTYPE
]
node_ids
=
th
.
arange
(
g
.
number_of_nodes
())
# find out the target node ids
node_tids
=
g
.
ndata
[
dgl
.
NTYPE
]
loc
=
(
node_tids
==
category_id
)
loc
=
node_tids
==
category_id
target_nids
=
node_ids
[
loc
]
train_nids
=
target_nids
[
train_idx
]
g
=
g
.
formats
(
'
csc
'
)
g
=
g
.
formats
(
"
csc
"
)
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
(
fanouts
)
loader
=
dgl
.
dataloading
.
DataLoader
(
g
,
...
...
@@ -246,22 +289,26 @@ def track_time(data):
batch_size
=
batch_size
,
shuffle
=
True
,
drop_last
=
False
,
num_workers
=
num_workers
)
num_workers
=
num_workers
,
)
# node features
# None for one-hot feature, if not none, it should be the feature tensor.
#
embed_layer
=
RelGraphEmbedLayer
(
device
,
embed_layer
=
RelGraphEmbedLayer
(
device
,
g
.
number_of_nodes
(),
node_tids
,
num_of_ntype
,
node_feats
,
n_hidden
,
sparse_emb
=
True
)
sparse_emb
=
True
,
)
# create model
# all model params are in device.
model
=
EntityClassify
(
device
,
model
=
EntityClassify
(
device
,
g
.
number_of_nodes
(),
n_hidden
,
num_classes
,
...
...
@@ -270,14 +317,19 @@ def track_time(data):
num_hidden_layers
=
n_layers
-
2
,
dropout
=
dropout
,
use_self_loop
=
use_self_loop
,
layer_norm
=
False
)
layer_norm
=
False
,
)
embed_layer
=
embed_layer
.
to
(
device
)
model
=
model
.
to
(
device
)
all_params
=
itertools
.
chain
(
model
.
parameters
(),
embed_layer
.
embeds
.
parameters
())
all_params
=
itertools
.
chain
(
model
.
parameters
(),
embed_layer
.
embeds
.
parameters
()
)
optimizer
=
th
.
optim
.
Adam
(
all_params
,
lr
=
lr
,
weight_decay
=
l2norm
)
emb_optimizer
=
th
.
optim
.
SparseAdam
(
list
(
embed_layer
.
node_embeds
.
parameters
()),
lr
=
lr
)
emb_optimizer
=
th
.
optim
.
SparseAdam
(
list
(
embed_layer
.
node_embeds
.
parameters
()),
lr
=
lr
)
print
(
"start training..."
)
model
.
train
()
...
...
@@ -287,12 +339,14 @@ def track_time(data):
with
loader
.
enable_cpu_affinity
():
for
step
,
sample_data
in
enumerate
(
loader
):
input_nodes
,
output_nodes
,
blocks
=
sample_data
feats
=
embed_layer
(
input_nodes
,
blocks
[
0
].
srcdata
[
'ntype'
],
blocks
[
0
].
srcdata
[
'type_id'
],
node_feats
)
feats
=
embed_layer
(
input_nodes
,
blocks
[
0
].
srcdata
[
"ntype"
],
blocks
[
0
].
srcdata
[
"type_id"
],
node_feats
,
)
logits
=
model
(
blocks
,
feats
)
seed_idx
=
blocks
[
-
1
].
dstdata
[
'
type_id
'
]
seed_idx
=
blocks
[
-
1
].
dstdata
[
"
type_id
"
]
loss
=
F
.
cross_entropy
(
logits
,
labels
[
seed_idx
])
optimizer
.
zero_grad
()
emb_optimizer
.
zero_grad
()
...
...
@@ -304,7 +358,9 @@ def track_time(data):
# start timer at before iter_start
if
step
==
iter_start
-
1
:
t0
=
time
.
time
()
elif
step
==
iter_count
+
iter_start
-
1
:
# time iter_count iterations
elif
(
step
==
iter_count
+
iter_start
-
1
):
# time iter_count iterations
break
t1
=
time
.
time
()
...
...
benchmarks/benchmarks/model_speed/bench_sage.py
View file @
9836f78e
import
time
import
dgl
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
dgl
from
dgl.nn.pytorch
import
SAGEConv
from
..
import
utils
...
...
benchmarks/benchmarks/model_speed/bench_sage_ns.py
View file @
9836f78e
import
time
import
dgl
import
dgl.nn.pytorch
as
dglnn
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.multiprocessing
as
mp
from
torch.utils.data
import
DataLoader
import
dgl.nn.pytorch
as
dglnn
import
time
from
..
import
utils
class
SAGE
(
nn
.
Module
):
def
__init__
(
self
,
in_feats
,
n_hidden
,
n_classes
,
n_layers
,
activation
,
dropout
):
def
__init__
(
self
,
in_feats
,
n_hidden
,
n_classes
,
n_layers
,
activation
,
dropout
):
super
().
__init__
()
self
.
n_layers
=
n_layers
self
.
n_hidden
=
n_hidden
self
.
n_classes
=
n_classes
self
.
layers
=
nn
.
ModuleList
()
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
in_feats
,
n_hidden
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
in_feats
,
n_hidden
,
"
mean
"
))
for
i
in
range
(
1
,
n_layers
-
1
):
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_hidden
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_classes
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_hidden
,
"
mean
"
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_classes
,
"
mean
"
))
self
.
dropout
=
nn
.
Dropout
(
dropout
)
self
.
activation
=
activation
...
...
@@ -39,23 +37,25 @@ class SAGE(nn.Module):
h
=
self
.
dropout
(
h
)
return
h
def
load_subtensor
(
g
,
seeds
,
input_nodes
,
device
):
"""
Copys features and labels of a set of nodes onto GPU.
"""
batch_inputs
=
g
.
ndata
[
'
features
'
][
input_nodes
].
to
(
device
)
batch_labels
=
g
.
ndata
[
'
labels
'
][
seeds
].
to
(
device
)
batch_inputs
=
g
.
ndata
[
"
features
"
][
input_nodes
].
to
(
device
)
batch_labels
=
g
.
ndata
[
"
labels
"
][
seeds
].
to
(
device
)
return
batch_inputs
,
batch_labels
@
utils
.
benchmark
(
'time'
,
600
)
@
utils
.
parametrize
(
'data'
,
[
'reddit'
,
'ogbn-products'
])
@
utils
.
benchmark
(
"time"
,
600
)
@
utils
.
parametrize
(
"data"
,
[
"reddit"
,
"ogbn-products"
])
def
track_time
(
data
):
data
=
utils
.
process_data
(
data
)
device
=
utils
.
get_bench_device
()
g
=
data
[
0
]
g
.
ndata
[
'
features
'
]
=
g
.
ndata
[
'
feat
'
]
g
.
ndata
[
'
labels
'
]
=
g
.
ndata
[
'
label
'
]
in_feats
=
g
.
ndata
[
'
features
'
].
shape
[
1
]
g
.
ndata
[
"
features
"
]
=
g
.
ndata
[
"
feat
"
]
g
.
ndata
[
"
labels
"
]
=
g
.
ndata
[
"
label
"
]
in_feats
=
g
.
ndata
[
"
features
"
].
shape
[
1
]
n_classes
=
data
.
num_classes
# Create csr/coo/csc formats before launching training processes with multi-gpu.
...
...
@@ -65,7 +65,7 @@ def track_time(data):
num_epochs
=
20
num_hidden
=
16
num_layers
=
2
fan_out
=
'
10,25
'
fan_out
=
"
10,25
"
batch_size
=
1024
lr
=
0.003
dropout
=
0.5
...
...
@@ -73,11 +73,12 @@ def track_time(data):
iter_start
=
3
iter_count
=
10
train_nid
=
th
.
nonzero
(
g
.
ndata
[
'
train_mask
'
],
as_tuple
=
True
)[
0
]
train_nid
=
th
.
nonzero
(
g
.
ndata
[
"
train_mask
"
],
as_tuple
=
True
)[
0
]
# Create PyTorch DataLoader for constructing blocks
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
(
[
int
(
fanout
)
for
fanout
in
fan_out
.
split
(
','
)])
[
int
(
fanout
)
for
fanout
in
fan_out
.
split
(
","
)]
)
dataloader
=
dgl
.
dataloading
.
DataLoader
(
g
,
train_nid
,
...
...
@@ -85,7 +86,8 @@ def track_time(data):
batch_size
=
batch_size
,
shuffle
=
True
,
drop_last
=
False
,
num_workers
=
num_workers
)
num_workers
=
num_workers
,
)
# Define model and optimizer
model
=
SAGE
(
in_feats
,
num_hidden
,
n_classes
,
num_layers
,
F
.
relu
,
dropout
)
...
...
@@ -102,10 +104,10 @@ def track_time(data):
for
step
,
(
input_nodes
,
seeds
,
blocks
)
in
enumerate
(
dataloader
):
# Load the input features as well as output labels
#batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device)
#
batch_inputs, batch_labels = load_subtensor(g, seeds, input_nodes, device)
blocks
=
[
block
.
int
().
to
(
device
)
for
block
in
blocks
]
batch_inputs
=
blocks
[
0
].
srcdata
[
'
features
'
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
'
labels
'
]
batch_inputs
=
blocks
[
0
].
srcdata
[
"
features
"
]
batch_labels
=
blocks
[
-
1
].
dstdata
[
"
labels
"
]
# Compute loss and prediction
batch_pred
=
model
(
blocks
,
batch_inputs
)
...
...
@@ -117,7 +119,9 @@ def track_time(data):
# start timer at before iter_start
if
step
==
iter_start
-
1
:
t0
=
time
.
time
()
elif
step
==
iter_count
+
iter_start
-
1
:
# time iter_count iterations
elif
(
step
==
iter_count
+
iter_start
-
1
):
# time iter_count iterations
break
t1
=
time
.
time
()
...
...
benchmarks/benchmarks/model_speed/bench_sage_unsupervised_ns.py
View file @
9836f78e
import
time
import
dgl
import
dgl.function
as
fn
import
dgl.nn.pytorch
as
dglnn
import
numpy
as
np
import
torch
as
th
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
dgl
import
dgl.function
as
fn
import
dgl.nn.pytorch
as
dglnn
from
..
import
utils
...
...
benchmarks/benchmarks/multigpu/bench_multigpu_rgcn.py
View file @
9836f78e
...
...
@@ -13,18 +13,18 @@ import time
from
pathlib
import
Path
from
types
import
SimpleNamespace
import
dgl
import
numpy
as
np
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn.functional
as
F
from
dgl.nn
import
RelGraphConv
from
torch.multiprocessing
import
Queue
from
torch.nn.parallel
import
DistributedDataParallel
from
torch.utils.data
import
DataLoader
import
dgl
from
dgl.nn
import
RelGraphConv
from
..
import
utils
# import sys
...
...
benchmarks/benchmarks/multigpu/bench_multigpu_sage.py
View file @
9836f78e
import
argparse
import
math
import
time
from
types
import
SimpleNamespace
from
typing
import
NamedTuple
import
dgl
import
dgl.nn.pytorch
as
dglnn
import
numpy
as
np
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.multiprocessing
as
mp
import
dgl.nn.pytorch
as
dglnn
import
time
import
math
import
argparse
from
torch.nn.parallel
import
DistributedDataParallel
import
dgl.nn.pytorch
as
dglnn
from
..
import
utils
class
SAGE
(
nn
.
Module
):
def
__init__
(
self
,
in_feats
,
n_hidden
,
n_classes
,
n_layers
,
activation
,
dropout
):
def
__init__
(
self
,
in_feats
,
n_hidden
,
n_classes
,
n_layers
,
activation
,
dropout
):
super
().
__init__
()
self
.
n_layers
=
n_layers
self
.
n_hidden
=
n_hidden
self
.
n_classes
=
n_classes
self
.
layers
=
nn
.
ModuleList
()
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
in_feats
,
n_hidden
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
in_feats
,
n_hidden
,
"
mean
"
))
for
i
in
range
(
1
,
n_layers
-
1
):
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_hidden
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_classes
,
'
mean
'
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_hidden
,
"
mean
"
))
self
.
layers
.
append
(
dglnn
.
SAGEConv
(
n_hidden
,
n_classes
,
"
mean
"
))
self
.
dropout
=
nn
.
Dropout
(
dropout
)
self
.
activation
=
activation
...
...
@@ -54,6 +51,7 @@ def load_subtensor(nfeat, labels, seeds, input_nodes, dev_id):
batch_labels
=
labels
[
seeds
].
to
(
dev_id
)
return
batch_inputs
,
batch_labels
# Entry point
...
...
@@ -61,35 +59,38 @@ def run(result_queue, proc_id, n_gpus, args, devices, data):
dev_id
=
devices
[
proc_id
]
timing_records
=
[]
if
n_gpus
>
1
:
dist_init_method
=
'tcp://{master_ip}:{master_port}'
.
format
(
master_ip
=
'127.0.0.1'
,
master_port
=
'12345'
)
dist_init_method
=
"tcp://{master_ip}:{master_port}"
.
format
(
master_ip
=
"127.0.0.1"
,
master_port
=
"12345"
)
world_size
=
n_gpus
th
.
distributed
.
init_process_group
(
backend
=
"nccl"
,
th
.
distributed
.
init_process_group
(
backend
=
"nccl"
,
init_method
=
dist_init_method
,
world_size
=
world_size
,
rank
=
proc_id
)
rank
=
proc_id
,
)
th
.
cuda
.
set_device
(
dev_id
)
n_classes
,
train_g
,
_
,
_
=
data
train_nfeat
=
train_g
.
ndata
.
pop
(
'
feat
'
)
train_labels
=
train_g
.
ndata
.
pop
(
'
label
'
)
train_nfeat
=
train_g
.
ndata
.
pop
(
"
feat
"
)
train_labels
=
train_g
.
ndata
.
pop
(
"
label
"
)
train_nfeat
=
train_nfeat
.
to
(
dev_id
)
train_labels
=
train_labels
.
to
(
dev_id
)
in_feats
=
train_nfeat
.
shape
[
1
]
train_mask
=
train_g
.
ndata
[
'
train_mask
'
]
train_mask
=
train_g
.
ndata
[
"
train_mask
"
]
train_nid
=
train_mask
.
nonzero
().
squeeze
()
# Split train_nid
train_nid
=
th
.
split
(
train_nid
,
math
.
ceil
(
len
(
train_nid
)
/
n_gpus
))[
proc_id
]
train_nid
=
th
.
split
(
train_nid
,
math
.
ceil
(
len
(
train_nid
)
/
n_gpus
))[
proc_id
]
# Create PyTorch DataLoader for constructing blocks
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
(
[
int
(
fanout
)
for
fanout
in
args
.
fan_out
.
split
(
','
)])
[
int
(
fanout
)
for
fanout
in
args
.
fan_out
.
split
(
","
)]
)
dataloader
=
dgl
.
dataloading
.
DataLoader
(
train_g
,
train_nid
,
...
...
@@ -97,15 +98,23 @@ def run(result_queue, proc_id, n_gpus, args, devices, data):
batch_size
=
args
.
batch_size
,
shuffle
=
True
,
drop_last
=
False
,
num_workers
=
args
.
num_workers
)
num_workers
=
args
.
num_workers
,
)
# Define model and optimizer
model
=
SAGE
(
in_feats
,
args
.
num_hidden
,
n_classes
,
args
.
num_layers
,
F
.
relu
,
args
.
dropout
)
model
=
SAGE
(
in_feats
,
args
.
num_hidden
,
n_classes
,
args
.
num_layers
,
F
.
relu
,
args
.
dropout
,
)
model
=
model
.
to
(
dev_id
)
if
n_gpus
>
1
:
model
=
DistributedDataParallel
(
model
,
device_ids
=
[
dev_id
],
output_device
=
dev_id
)
model
,
device_ids
=
[
dev_id
],
output_device
=
dev_id
)
loss_fcn
=
nn
.
CrossEntropyLoss
()
optimizer
=
optim
.
Adam
(
model
.
parameters
(),
lr
=
args
.
lr
)
...
...
@@ -114,8 +123,9 @@ def run(result_queue, proc_id, n_gpus, args, devices, data):
if
proc_id
==
0
:
tic_step
=
time
.
time
()
batch_inputs
,
batch_labels
=
load_subtensor
(
train_nfeat
,
train_labels
,
seeds
,
input_nodes
,
dev_id
)
batch_inputs
,
batch_labels
=
load_subtensor
(
train_nfeat
,
train_labels
,
seeds
,
input_nodes
,
dev_id
)
blocks
=
[
block
.
int
().
to
(
dev_id
)
for
block
in
blocks
]
batch_pred
=
model
(
blocks
,
batch_inputs
)
loss
=
loss_fcn
(
batch_pred
,
batch_labels
)
...
...
@@ -135,18 +145,18 @@ def run(result_queue, proc_id, n_gpus, args, devices, data):
result_queue
.
put
(
np
.
array
(
timing_records
))
@
utils
.
benchmark
(
'
time
'
,
timeout
=
600
)
@
utils
.
benchmark
(
"
time
"
,
timeout
=
600
)
@
utils
.
skip_if_not_4gpu
()
@
utils
.
parametrize
(
'
data
'
,
[
'
reddit
'
,
'
ogbn-products
'
])
@
utils
.
parametrize
(
"
data
"
,
[
"
reddit
"
,
"
ogbn-products
"
])
def
track_time
(
data
):
args
=
SimpleNamespace
(
num_hidden
=
16
,
fan_out
=
"10,25"
,
batch_size
=
1000
,
lr
=
0.003
,
dropout
=
0.5
,
num_layers
=
2
,
num_workers
=
4
,
fan_out
=
"10,25"
,
batch_size
=
1000
,
lr
=
0.003
,
dropout
=
0.5
,
num_layers
=
2
,
num_workers
=
4
,
)
devices
=
[
0
,
1
,
2
,
3
]
...
...
@@ -167,8 +177,10 @@ def track_time(data):
result_queue
=
mp
.
Queue
()
procs
=
[]
for
proc_id
in
range
(
n_gpus
):
p
=
mp
.
Process
(
target
=
utils
.
thread_wrapped_func
(
run
),
args
=
(
result_queue
,
proc_id
,
n_gpus
,
args
,
devices
,
data
))
p
=
mp
.
Process
(
target
=
utils
.
thread_wrapped_func
(
run
),
args
=
(
result_queue
,
proc_id
,
n_gpus
,
args
,
devices
,
data
),
)
p
.
start
()
procs
.
append
(
p
)
for
p
in
procs
:
...
...
@@ -177,5 +189,5 @@ def track_time(data):
num_exclude
=
10
# exclude first 10 iterations
if
len
(
time_records
)
<
15
:
# exclude less if less records
num_exclude
=
int
(
len
(
time_records
)
*
0.3
)
num_exclude
=
int
(
len
(
time_records
)
*
0.3
)
return
np
.
mean
(
time_records
[
num_exclude
:])
benchmarks/benchmarks/multigpu/rgcn_model.py
View file @
9836f78e
import
dgl
import
torch
as
th
import
torch.nn
as
nn
import
dgl
class
BaseRGCN
(
nn
.
Module
):
def
__init__
(
...
...
benchmarks/benchmarks/rgcn.py
View file @
9836f78e
import
dgl
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
dgl
from
dgl.nn.pytorch
import
RelGraphConv
from
.
import
utils
...
...
benchmarks/benchmarks/utils.py
View file @
9836f78e
...
...
@@ -8,14 +8,14 @@ import zipfile
from
functools
import
partial
,
reduce
,
wraps
from
timeit
import
default_timer
import
dgl
import
numpy
as
np
import
pandas
import
requests
import
torch
from
ogb.nodeproppred
import
DglNodePropPredDataset
import
dgl
def
_download
(
url
,
path
,
filename
):
fn
=
os
.
path
.
join
(
path
,
filename
)
...
...
benchmarks/scripts/replace_branch.py
View file @
9836f78e
...
...
@@ -20,7 +20,6 @@ def json_minify(string, strip_space=True):
index
=
0
for
match
in
re
.
finditer
(
tokenizer
,
string
):
if
not
(
in_multi
or
in_single
):
tmp
=
string
[
index
:
match
.
start
()]
if
not
in_string
and
strip_space
:
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment