Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
704bcaf6
Unverified
Commit
704bcaf6
authored
Feb 19, 2023
by
Hongzhi (Steve), Chen
Committed by
GitHub
Feb 19, 2023
Browse files
examples (#5323)
Co-authored-by:
Ubuntu
<
ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal
>
parent
6bc82161
Changes
332
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
921 additions
and
485 deletions
+921
-485
examples/pytorch/gat/train.py
examples/pytorch/gat/train.py
+2
-2
examples/pytorch/gat/train_ppi.py
examples/pytorch/gat/train_ppi.py
+57
-25
examples/pytorch/gatv2/train.py
examples/pytorch/gatv2/train.py
+8
-5
examples/pytorch/gcmc/data.py
examples/pytorch/gcmc/data.py
+420
-176
examples/pytorch/gcmc/model.py
examples/pytorch/gcmc/model.py
+95
-78
examples/pytorch/gcmc/train.py
examples/pytorch/gcmc/train.py
+7
-2
examples/pytorch/gcmc/train_sampling.py
examples/pytorch/gcmc/train_sampling.py
+271
-151
examples/pytorch/gcn/train.py
examples/pytorch/gcn/train.py
+3
-3
examples/pytorch/geniepath/model.py
examples/pytorch/geniepath/model.py
+1
-1
examples/pytorch/geniepath/ppi.py
examples/pytorch/geniepath/ppi.py
+2
-2
examples/pytorch/geniepath/pubmed.py
examples/pytorch/geniepath/pubmed.py
+2
-2
examples/pytorch/ggnn/data_utils.py
examples/pytorch/ggnn/data_utils.py
+3
-3
examples/pytorch/ggnn/ggnn_gc.py
examples/pytorch/ggnn/ggnn_gc.py
+1
-1
examples/pytorch/ggnn/ggnn_ns.py
examples/pytorch/ggnn/ggnn_ns.py
+2
-3
examples/pytorch/ggnn/ggsnn.py
examples/pytorch/ggnn/ggsnn.py
+1
-1
examples/pytorch/gin/train.py
examples/pytorch/gin/train.py
+2
-2
examples/pytorch/gnn_explainer/explain_main.py
examples/pytorch/gnn_explainer/explain_main.py
+10
-6
examples/pytorch/gnn_explainer/models.py
examples/pytorch/gnn_explainer/models.py
+8
-6
examples/pytorch/gnn_explainer/train_main.py
examples/pytorch/gnn_explainer/train_main.py
+25
-14
examples/pytorch/grace/aug.py
examples/pytorch/grace/aug.py
+1
-2
No files found.
examples/pytorch/gat/train.py
View file @
704bcaf6
import
argparse
import
argparse
import
dgl.nn
as
dglnn
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
dgl.nn
as
dglnn
from
dgl
import
AddSelfLoop
from
dgl
import
AddSelfLoop
from
dgl.data
import
CiteseerGraphDataset
,
CoraGraphDataset
,
PubmedGraphDataset
from
dgl.data
import
CiteseerGraphDataset
,
CoraGraphDataset
,
PubmedGraphDataset
...
...
examples/pytorch/gat/train_ppi.py
View file @
704bcaf6
import
dgl.nn
as
dglnn
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
dgl.nn
as
dglnn
from
dgl.data.ppi
import
PPIDataset
from
dgl.data.ppi
import
PPIDataset
from
dgl.dataloading
import
GraphDataLoader
from
dgl.dataloading
import
GraphDataLoader
from
sklearn.metrics
import
f1_score
from
sklearn.metrics
import
f1_score
class
GAT
(
nn
.
Module
):
class
GAT
(
nn
.
Module
):
def
__init__
(
self
,
in_size
,
hid_size
,
out_size
,
heads
):
def
__init__
(
self
,
in_size
,
hid_size
,
out_size
,
heads
):
super
().
__init__
()
super
().
__init__
()
self
.
gat_layers
=
nn
.
ModuleList
()
self
.
gat_layers
=
nn
.
ModuleList
()
# three-layer GAT
# three-layer GAT
self
.
gat_layers
.
append
(
dglnn
.
GATConv
(
in_size
,
hid_size
,
heads
[
0
],
activation
=
F
.
elu
))
self
.
gat_layers
.
append
(
self
.
gat_layers
.
append
(
dglnn
.
GATConv
(
hid_size
*
heads
[
0
],
hid_size
,
heads
[
1
],
residual
=
True
,
activation
=
F
.
elu
))
dglnn
.
GATConv
(
in_size
,
hid_size
,
heads
[
0
],
activation
=
F
.
elu
)
self
.
gat_layers
.
append
(
dglnn
.
GATConv
(
hid_size
*
heads
[
1
],
out_size
,
heads
[
2
],
residual
=
True
,
activation
=
None
))
)
self
.
gat_layers
.
append
(
dglnn
.
GATConv
(
hid_size
*
heads
[
0
],
hid_size
,
heads
[
1
],
residual
=
True
,
activation
=
F
.
elu
,
)
)
self
.
gat_layers
.
append
(
dglnn
.
GATConv
(
hid_size
*
heads
[
1
],
out_size
,
heads
[
2
],
residual
=
True
,
activation
=
None
,
)
)
def
forward
(
self
,
g
,
inputs
):
def
forward
(
self
,
g
,
inputs
):
h
=
inputs
h
=
inputs
for
i
,
layer
in
enumerate
(
self
.
gat_layers
):
for
i
,
layer
in
enumerate
(
self
.
gat_layers
):
h
=
layer
(
g
,
h
)
h
=
layer
(
g
,
h
)
if
i
==
2
:
# last layer
if
i
==
2
:
# last layer
h
=
h
.
mean
(
1
)
h
=
h
.
mean
(
1
)
else
:
# other layer(s)
else
:
# other layer(s)
h
=
h
.
flatten
(
1
)
h
=
h
.
flatten
(
1
)
return
h
return
h
def
evaluate
(
g
,
features
,
labels
,
model
):
def
evaluate
(
g
,
features
,
labels
,
model
):
model
.
eval
()
model
.
eval
()
with
torch
.
no_grad
():
with
torch
.
no_grad
():
output
=
model
(
g
,
features
)
output
=
model
(
g
,
features
)
pred
=
np
.
where
(
output
.
data
.
cpu
().
numpy
()
>=
0
,
1
,
0
)
pred
=
np
.
where
(
output
.
data
.
cpu
().
numpy
()
>=
0
,
1
,
0
)
score
=
f1_score
(
labels
.
data
.
cpu
().
numpy
(),
pred
,
average
=
'
micro
'
)
score
=
f1_score
(
labels
.
data
.
cpu
().
numpy
(),
pred
,
average
=
"
micro
"
)
return
score
return
score
def
evaluate_in_batches
(
dataloader
,
device
,
model
):
def
evaluate_in_batches
(
dataloader
,
device
,
model
):
total_score
=
0
total_score
=
0
for
batch_id
,
batched_graph
in
enumerate
(
dataloader
):
for
batch_id
,
batched_graph
in
enumerate
(
dataloader
):
batched_graph
=
batched_graph
.
to
(
device
)
batched_graph
=
batched_graph
.
to
(
device
)
features
=
batched_graph
.
ndata
[
'
feat
'
]
features
=
batched_graph
.
ndata
[
"
feat
"
]
labels
=
batched_graph
.
ndata
[
'
label
'
]
labels
=
batched_graph
.
ndata
[
"
label
"
]
score
=
evaluate
(
batched_graph
,
features
,
labels
,
model
)
score
=
evaluate
(
batched_graph
,
features
,
labels
,
model
)
total_score
+=
score
total_score
+=
score
return
total_score
/
(
batch_id
+
1
)
# return average score
return
total_score
/
(
batch_id
+
1
)
# return average score
def
train
(
train_dataloader
,
val_dataloader
,
device
,
model
):
def
train
(
train_dataloader
,
val_dataloader
,
device
,
model
):
# define loss function and optimizer
# define loss function and optimizer
...
@@ -57,44 +79,54 @@ def train(train_dataloader, val_dataloader, device, model):
...
@@ -57,44 +79,54 @@ def train(train_dataloader, val_dataloader, device, model):
# mini-batch loop
# mini-batch loop
for
batch_id
,
batched_graph
in
enumerate
(
train_dataloader
):
for
batch_id
,
batched_graph
in
enumerate
(
train_dataloader
):
batched_graph
=
batched_graph
.
to
(
device
)
batched_graph
=
batched_graph
.
to
(
device
)
features
=
batched_graph
.
ndata
[
'
feat
'
].
float
()
features
=
batched_graph
.
ndata
[
"
feat
"
].
float
()
labels
=
batched_graph
.
ndata
[
'
label
'
].
float
()
labels
=
batched_graph
.
ndata
[
"
label
"
].
float
()
logits
=
model
(
batched_graph
,
features
)
logits
=
model
(
batched_graph
,
features
)
loss
=
loss_fcn
(
logits
,
labels
)
loss
=
loss_fcn
(
logits
,
labels
)
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
loss
.
backward
()
loss
.
backward
()
optimizer
.
step
()
optimizer
.
step
()
total_loss
+=
loss
.
item
()
total_loss
+=
loss
.
item
()
print
(
"Epoch {:05d} | Loss {:.4f} |"
.
format
(
epoch
,
total_loss
/
(
batch_id
+
1
)
))
print
(
"Epoch {:05d} | Loss {:.4f} |"
.
format
(
epoch
,
total_loss
/
(
batch_id
+
1
)
)
)
if
(
epoch
+
1
)
%
5
==
0
:
if
(
epoch
+
1
)
%
5
==
0
:
avg_score
=
evaluate_in_batches
(
val_dataloader
,
device
,
model
)
# evaluate F1-score instead of loss
avg_score
=
evaluate_in_batches
(
print
(
" Acc. (F1-score) {:.4f} "
.
format
(
avg_score
))
val_dataloader
,
device
,
model
)
# evaluate F1-score instead of loss
print
(
" Acc. (F1-score) {:.4f} "
.
format
(
avg_score
)
)
if
__name__
==
'
__main__
'
:
if
__name__
==
"
__main__
"
:
print
(
f
'
Training PPI Dataset with DGL built-in GATConv module.
'
)
print
(
f
"
Training PPI Dataset with DGL built-in GATConv module.
"
)
device
=
torch
.
device
(
'
cuda
'
if
torch
.
cuda
.
is_available
()
else
'
cpu
'
)
device
=
torch
.
device
(
"
cuda
"
if
torch
.
cuda
.
is_available
()
else
"
cpu
"
)
# load and preprocess datasets
# load and preprocess datasets
train_dataset
=
PPIDataset
(
mode
=
'
train
'
)
train_dataset
=
PPIDataset
(
mode
=
"
train
"
)
val_dataset
=
PPIDataset
(
mode
=
'
valid
'
)
val_dataset
=
PPIDataset
(
mode
=
"
valid
"
)
test_dataset
=
PPIDataset
(
mode
=
'
test
'
)
test_dataset
=
PPIDataset
(
mode
=
"
test
"
)
features
=
train_dataset
[
0
].
ndata
[
'
feat
'
]
features
=
train_dataset
[
0
].
ndata
[
"
feat
"
]
# create GAT model
# create GAT model
in_size
=
features
.
shape
[
1
]
in_size
=
features
.
shape
[
1
]
out_size
=
train_dataset
.
num_labels
out_size
=
train_dataset
.
num_labels
model
=
GAT
(
in_size
,
256
,
out_size
,
heads
=
[
4
,
4
,
6
]).
to
(
device
)
model
=
GAT
(
in_size
,
256
,
out_size
,
heads
=
[
4
,
4
,
6
]).
to
(
device
)
# model training
# model training
print
(
'
Training...
'
)
print
(
"
Training...
"
)
train_dataloader
=
GraphDataLoader
(
train_dataset
,
batch_size
=
2
)
train_dataloader
=
GraphDataLoader
(
train_dataset
,
batch_size
=
2
)
val_dataloader
=
GraphDataLoader
(
val_dataset
,
batch_size
=
2
)
val_dataloader
=
GraphDataLoader
(
val_dataset
,
batch_size
=
2
)
train
(
train_dataloader
,
val_dataloader
,
device
,
model
)
train
(
train_dataloader
,
val_dataloader
,
device
,
model
)
# test the model
# test the model
print
(
'
Testing...
'
)
print
(
"
Testing...
"
)
test_dataloader
=
GraphDataLoader
(
test_dataset
,
batch_size
=
2
)
test_dataloader
=
GraphDataLoader
(
test_dataset
,
batch_size
=
2
)
avg_score
=
evaluate_in_batches
(
test_dataloader
,
device
,
model
)
avg_score
=
evaluate_in_batches
(
test_dataloader
,
device
,
model
)
print
(
"Test Accuracy (F1-score) {:.4f}"
.
format
(
avg_score
))
print
(
"Test Accuracy (F1-score) {:.4f}"
.
format
(
avg_score
))
examples/pytorch/gatv2/train.py
View file @
704bcaf6
...
@@ -6,15 +6,19 @@ Multiple heads are also batched together for faster training.
...
@@ -6,15 +6,19 @@ Multiple heads are also batched together for faster training.
import
argparse
import
argparse
import
time
import
time
import
dgl
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
dgl.data
import
(
CiteseerGraphDataset
,
CoraGraphDataset
,
PubmedGraphDataset
,
register_data_args
,
)
from
gatv2
import
GATv2
from
gatv2
import
GATv2
import
dgl
from
dgl.data
import
(
CiteseerGraphDataset
,
CoraGraphDataset
,
PubmedGraphDataset
,
register_data_args
)
class
EarlyStopping
:
class
EarlyStopping
:
def
__init__
(
self
,
patience
=
10
):
def
__init__
(
self
,
patience
=
10
):
...
@@ -180,7 +184,6 @@ def main(args):
...
@@ -180,7 +184,6 @@ def main(args):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
"GAT"
)
parser
=
argparse
.
ArgumentParser
(
description
=
"GAT"
)
register_data_args
(
parser
)
register_data_args
(
parser
)
parser
.
add_argument
(
parser
.
add_argument
(
...
...
examples/pytorch/gcmc/data.py
View file @
704bcaf6
"""MovieLens dataset"""
"""MovieLens dataset"""
import
numpy
as
np
import
os
import
os
import
re
import
re
import
dgl
import
numpy
as
np
import
pandas
as
pd
import
pandas
as
pd
import
scipy.sparse
as
sp
import
scipy.sparse
as
sp
import
torch
as
th
import
torch
as
th
import
dgl
from
dgl.data.utils
import
download
,
extract_archive
,
get_download_dir
from
dgl.data.utils
import
download
,
extract_archive
,
get_download_dir
from
utils
import
to_etype_name
from
utils
import
to_etype_name
_urls
=
{
_urls
=
{
'
ml-100k
'
:
'
http://files.grouplens.org/datasets/movielens/ml-100k.zip
'
,
"
ml-100k
"
:
"
http://files.grouplens.org/datasets/movielens/ml-100k.zip
"
,
'
ml-1m
'
:
'
http://files.grouplens.org/datasets/movielens/ml-1m.zip
'
,
"
ml-1m
"
:
"
http://files.grouplens.org/datasets/movielens/ml-1m.zip
"
,
'
ml-10m
'
:
'
http://files.grouplens.org/datasets/movielens/ml-10m.zip
'
,
"
ml-10m
"
:
"
http://files.grouplens.org/datasets/movielens/ml-10m.zip
"
,
}
}
READ_DATASET_PATH
=
get_download_dir
()
READ_DATASET_PATH
=
get_download_dir
()
GENRES_ML_100K
=
\
GENRES_ML_100K
=
[
[
'unknown'
,
'Action'
,
'Adventure'
,
'Animation'
,
"unknown"
,
'Children'
,
'Comedy'
,
'Crime'
,
'Documentary'
,
'Drama'
,
'Fantasy'
,
"Action"
,
'Film-Noir'
,
'Horror'
,
'Musical'
,
'Mystery'
,
'Romance'
,
'Sci-Fi'
,
"Adventure"
,
'Thriller'
,
'War'
,
'Western'
]
"Animation"
,
"Children"
,
"Comedy"
,
"Crime"
,
"Documentary"
,
"Drama"
,
"Fantasy"
,
"Film-Noir"
,
"Horror"
,
"Musical"
,
"Mystery"
,
"Romance"
,
"Sci-Fi"
,
"Thriller"
,
"War"
,
"Western"
,
]
GENRES_ML_1M
=
GENRES_ML_100K
[
1
:]
GENRES_ML_1M
=
GENRES_ML_100K
[
1
:]
GENRES_ML_10M
=
GENRES_ML_100K
+
[
'IMAX'
]
GENRES_ML_10M
=
GENRES_ML_100K
+
[
"IMAX"
]
class
MovieLens
(
object
):
class
MovieLens
(
object
):
"""MovieLens dataset used by GCMC model
"""MovieLens dataset used by GCMC model
...
@@ -97,9 +114,17 @@ class MovieLens(object):
...
@@ -97,9 +114,17 @@ class MovieLens(object):
Ratio of validation data
Ratio of validation data
"""
"""
def
__init__
(
self
,
name
,
device
,
mix_cpu_gpu
=
False
,
use_one_hot_fea
=
False
,
symm
=
True
,
def
__init__
(
test_ratio
=
0.1
,
valid_ratio
=
0.1
):
self
,
name
,
device
,
mix_cpu_gpu
=
False
,
use_one_hot_fea
=
False
,
symm
=
True
,
test_ratio
=
0.1
,
valid_ratio
=
0.1
,
):
self
.
_name
=
name
self
.
_name
=
name
self
.
_device
=
device
self
.
_device
=
device
self
.
_symm
=
symm
self
.
_symm
=
symm
...
@@ -107,57 +132,106 @@ class MovieLens(object):
...
@@ -107,57 +132,106 @@ class MovieLens(object):
self
.
_valid_ratio
=
valid_ratio
self
.
_valid_ratio
=
valid_ratio
# download and extract
# download and extract
download_dir
=
get_download_dir
()
download_dir
=
get_download_dir
()
zip_file_path
=
'
{}/{}.zip
'
.
format
(
download_dir
,
name
)
zip_file_path
=
"
{}/{}.zip
"
.
format
(
download_dir
,
name
)
download
(
_urls
[
name
],
path
=
zip_file_path
)
download
(
_urls
[
name
],
path
=
zip_file_path
)
extract_archive
(
zip_file_path
,
'
{}/{}
'
.
format
(
download_dir
,
name
))
extract_archive
(
zip_file_path
,
"
{}/{}
"
.
format
(
download_dir
,
name
))
if
name
==
'
ml-10m
'
:
if
name
==
"
ml-10m
"
:
root_folder
=
'
ml-10M100K
'
root_folder
=
"
ml-10M100K
"
else
:
else
:
root_folder
=
name
root_folder
=
name
self
.
_dir
=
os
.
path
.
join
(
download_dir
,
name
,
root_folder
)
self
.
_dir
=
os
.
path
.
join
(
download_dir
,
name
,
root_folder
)
print
(
"Starting processing {} ..."
.
format
(
self
.
_name
))
print
(
"Starting processing {} ..."
.
format
(
self
.
_name
))
self
.
_load_raw_user_info
()
self
.
_load_raw_user_info
()
self
.
_load_raw_movie_info
()
self
.
_load_raw_movie_info
()
print
(
'......'
)
print
(
"......"
)
if
self
.
_name
==
'ml-100k'
:
if
self
.
_name
==
"ml-100k"
:
self
.
all_train_rating_info
=
self
.
_load_raw_rates
(
os
.
path
.
join
(
self
.
_dir
,
'u1.base'
),
'
\t
'
)
self
.
all_train_rating_info
=
self
.
_load_raw_rates
(
self
.
test_rating_info
=
self
.
_load_raw_rates
(
os
.
path
.
join
(
self
.
_dir
,
'u1.test'
),
'
\t
'
)
os
.
path
.
join
(
self
.
_dir
,
"u1.base"
),
"
\t
"
self
.
all_rating_info
=
pd
.
concat
([
self
.
all_train_rating_info
,
self
.
test_rating_info
])
)
elif
self
.
_name
==
'ml-1m'
or
self
.
_name
==
'ml-10m'
:
self
.
test_rating_info
=
self
.
_load_raw_rates
(
self
.
all_rating_info
=
self
.
_load_raw_rates
(
os
.
path
.
join
(
self
.
_dir
,
'ratings.dat'
),
'::'
)
os
.
path
.
join
(
self
.
_dir
,
"u1.test"
),
"
\t
"
num_test
=
int
(
np
.
ceil
(
self
.
all_rating_info
.
shape
[
0
]
*
self
.
_test_ratio
))
)
self
.
all_rating_info
=
pd
.
concat
(
[
self
.
all_train_rating_info
,
self
.
test_rating_info
]
)
elif
self
.
_name
==
"ml-1m"
or
self
.
_name
==
"ml-10m"
:
self
.
all_rating_info
=
self
.
_load_raw_rates
(
os
.
path
.
join
(
self
.
_dir
,
"ratings.dat"
),
"::"
)
num_test
=
int
(
np
.
ceil
(
self
.
all_rating_info
.
shape
[
0
]
*
self
.
_test_ratio
)
)
shuffled_idx
=
np
.
random
.
permutation
(
self
.
all_rating_info
.
shape
[
0
])
shuffled_idx
=
np
.
random
.
permutation
(
self
.
all_rating_info
.
shape
[
0
])
self
.
test_rating_info
=
self
.
all_rating_info
.
iloc
[
shuffled_idx
[:
num_test
]]
self
.
test_rating_info
=
self
.
all_rating_info
.
iloc
[
self
.
all_train_rating_info
=
self
.
all_rating_info
.
iloc
[
shuffled_idx
[
num_test
:
]]
shuffled_idx
[:
num_test
]
]
self
.
all_train_rating_info
=
self
.
all_rating_info
.
iloc
[
shuffled_idx
[
num_test
:]
]
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
print
(
'......'
)
print
(
"......"
)
num_valid
=
int
(
np
.
ceil
(
self
.
all_train_rating_info
.
shape
[
0
]
*
self
.
_valid_ratio
))
num_valid
=
int
(
shuffled_idx
=
np
.
random
.
permutation
(
self
.
all_train_rating_info
.
shape
[
0
])
np
.
ceil
(
self
.
all_train_rating_info
.
shape
[
0
]
*
self
.
_valid_ratio
)
self
.
valid_rating_info
=
self
.
all_train_rating_info
.
iloc
[
shuffled_idx
[:
num_valid
]]
)
self
.
train_rating_info
=
self
.
all_train_rating_info
.
iloc
[
shuffled_idx
[
num_valid
:
]]
shuffled_idx
=
np
.
random
.
permutation
(
self
.
possible_rating_values
=
np
.
unique
(
self
.
train_rating_info
[
"rating"
].
values
)
self
.
all_train_rating_info
.
shape
[
0
]
)
self
.
valid_rating_info
=
self
.
all_train_rating_info
.
iloc
[
shuffled_idx
[:
num_valid
]
]
self
.
train_rating_info
=
self
.
all_train_rating_info
.
iloc
[
shuffled_idx
[
num_valid
:]
]
self
.
possible_rating_values
=
np
.
unique
(
self
.
train_rating_info
[
"rating"
].
values
)
print
(
"All rating pairs : {}"
.
format
(
self
.
all_rating_info
.
shape
[
0
]))
print
(
"All rating pairs : {}"
.
format
(
self
.
all_rating_info
.
shape
[
0
]))
print
(
"
\t
All train rating pairs : {}"
.
format
(
self
.
all_train_rating_info
.
shape
[
0
]))
print
(
print
(
"
\t\t
Train rating pairs : {}"
.
format
(
self
.
train_rating_info
.
shape
[
0
]))
"
\t
All train rating pairs : {}"
.
format
(
print
(
"
\t\t
Valid rating pairs : {}"
.
format
(
self
.
valid_rating_info
.
shape
[
0
]))
self
.
all_train_rating_info
.
shape
[
0
]
print
(
"
\t
Test rating pairs : {}"
.
format
(
self
.
test_rating_info
.
shape
[
0
]))
)
)
self
.
user_info
=
self
.
_drop_unseen_nodes
(
orign_info
=
self
.
user_info
,
print
(
cmp_col_name
=
"id"
,
"
\t\t
Train rating pairs : {}"
.
format
(
reserved_ids_set
=
set
(
self
.
all_rating_info
[
"user_id"
].
values
),
self
.
train_rating_info
.
shape
[
0
]
label
=
"user"
)
)
self
.
movie_info
=
self
.
_drop_unseen_nodes
(
orign_info
=
self
.
movie_info
,
)
cmp_col_name
=
"id"
,
print
(
reserved_ids_set
=
set
(
self
.
all_rating_info
[
"movie_id"
].
values
),
"
\t\t
Valid rating pairs : {}"
.
format
(
label
=
"movie"
)
self
.
valid_rating_info
.
shape
[
0
]
)
)
print
(
"
\t
Test rating pairs : {}"
.
format
(
self
.
test_rating_info
.
shape
[
0
])
)
self
.
user_info
=
self
.
_drop_unseen_nodes
(
orign_info
=
self
.
user_info
,
cmp_col_name
=
"id"
,
reserved_ids_set
=
set
(
self
.
all_rating_info
[
"user_id"
].
values
),
label
=
"user"
,
)
self
.
movie_info
=
self
.
_drop_unseen_nodes
(
orign_info
=
self
.
movie_info
,
cmp_col_name
=
"id"
,
reserved_ids_set
=
set
(
self
.
all_rating_info
[
"movie_id"
].
values
),
label
=
"movie"
,
)
# Map user/movie to the global id
# Map user/movie to the global id
self
.
global_user_id_map
=
{
ele
:
i
for
i
,
ele
in
enumerate
(
self
.
user_info
[
'id'
])}
self
.
global_user_id_map
=
{
self
.
global_movie_id_map
=
{
ele
:
i
for
i
,
ele
in
enumerate
(
self
.
movie_info
[
'id'
])}
ele
:
i
for
i
,
ele
in
enumerate
(
self
.
user_info
[
"id"
])
print
(
'Total user number = {}, movie number = {}'
.
format
(
len
(
self
.
global_user_id_map
),
}
len
(
self
.
global_movie_id_map
)))
self
.
global_movie_id_map
=
{
ele
:
i
for
i
,
ele
in
enumerate
(
self
.
movie_info
[
"id"
])
}
print
(
"Total user number = {}, movie number = {}"
.
format
(
len
(
self
.
global_user_id_map
),
len
(
self
.
global_movie_id_map
)
)
)
self
.
_num_user
=
len
(
self
.
global_user_id_map
)
self
.
_num_user
=
len
(
self
.
global_user_id_map
)
self
.
_num_movie
=
len
(
self
.
global_movie_id_map
)
self
.
_num_movie
=
len
(
self
.
global_movie_id_map
)
...
@@ -171,8 +245,12 @@ class MovieLens(object):
...
@@ -171,8 +245,12 @@ class MovieLens(object):
self
.
user_feature
=
th
.
FloatTensor
(
self
.
_process_user_fea
())
self
.
user_feature
=
th
.
FloatTensor
(
self
.
_process_user_fea
())
self
.
movie_feature
=
th
.
FloatTensor
(
self
.
_process_movie_fea
())
self
.
movie_feature
=
th
.
FloatTensor
(
self
.
_process_movie_fea
())
else
:
else
:
self
.
user_feature
=
th
.
FloatTensor
(
self
.
_process_user_fea
()).
to
(
self
.
_device
)
self
.
user_feature
=
th
.
FloatTensor
(
self
.
_process_user_fea
()).
to
(
self
.
movie_feature
=
th
.
FloatTensor
(
self
.
_process_movie_fea
()).
to
(
self
.
_device
)
self
.
_device
)
self
.
movie_feature
=
th
.
FloatTensor
(
self
.
_process_movie_fea
()
).
to
(
self
.
_device
)
if
self
.
user_feature
is
None
:
if
self
.
user_feature
is
None
:
self
.
user_feature_shape
=
(
self
.
num_user
,
self
.
num_user
)
self
.
user_feature_shape
=
(
self
.
num_user
,
self
.
num_user
)
self
.
movie_feature_shape
=
(
self
.
num_movie
,
self
.
num_movie
)
self
.
movie_feature_shape
=
(
self
.
num_movie
,
self
.
num_movie
)
...
@@ -184,16 +262,29 @@ class MovieLens(object):
...
@@ -184,16 +262,29 @@ class MovieLens(object):
info_line
+=
"
\n
movie: {}"
.
format
(
self
.
movie_feature_shape
)
info_line
+=
"
\n
movie: {}"
.
format
(
self
.
movie_feature_shape
)
print
(
info_line
)
print
(
info_line
)
all_train_rating_pairs
,
all_train_rating_values
=
self
.
_generate_pair_value
(
self
.
all_train_rating_info
)
(
train_rating_pairs
,
train_rating_values
=
self
.
_generate_pair_value
(
self
.
train_rating_info
)
all_train_rating_pairs
,
valid_rating_pairs
,
valid_rating_values
=
self
.
_generate_pair_value
(
self
.
valid_rating_info
)
all_train_rating_values
,
test_rating_pairs
,
test_rating_values
=
self
.
_generate_pair_value
(
self
.
test_rating_info
)
)
=
self
.
_generate_pair_value
(
self
.
all_train_rating_info
)
train_rating_pairs
,
train_rating_values
=
self
.
_generate_pair_value
(
self
.
train_rating_info
)
valid_rating_pairs
,
valid_rating_values
=
self
.
_generate_pair_value
(
self
.
valid_rating_info
)
test_rating_pairs
,
test_rating_values
=
self
.
_generate_pair_value
(
self
.
test_rating_info
)
def
_make_labels
(
ratings
):
def
_make_labels
(
ratings
):
labels
=
th
.
LongTensor
(
np
.
searchsorted
(
self
.
possible_rating_values
,
ratings
)).
to
(
device
)
labels
=
th
.
LongTensor
(
np
.
searchsorted
(
self
.
possible_rating_values
,
ratings
)
).
to
(
device
)
return
labels
return
labels
self
.
train_enc_graph
=
self
.
_generate_enc_graph
(
train_rating_pairs
,
train_rating_values
,
add_support
=
True
)
self
.
train_enc_graph
=
self
.
_generate_enc_graph
(
train_rating_pairs
,
train_rating_values
,
add_support
=
True
)
self
.
train_dec_graph
=
self
.
_generate_dec_graph
(
train_rating_pairs
)
self
.
train_dec_graph
=
self
.
_generate_dec_graph
(
train_rating_pairs
)
self
.
train_labels
=
_make_labels
(
train_rating_values
)
self
.
train_labels
=
_make_labels
(
train_rating_values
)
self
.
train_truths
=
th
.
FloatTensor
(
train_rating_values
).
to
(
device
)
self
.
train_truths
=
th
.
FloatTensor
(
train_rating_values
).
to
(
device
)
...
@@ -203,7 +294,9 @@ class MovieLens(object):
...
@@ -203,7 +294,9 @@ class MovieLens(object):
self
.
valid_labels
=
_make_labels
(
valid_rating_values
)
self
.
valid_labels
=
_make_labels
(
valid_rating_values
)
self
.
valid_truths
=
th
.
FloatTensor
(
valid_rating_values
).
to
(
device
)
self
.
valid_truths
=
th
.
FloatTensor
(
valid_rating_values
).
to
(
device
)
self
.
test_enc_graph
=
self
.
_generate_enc_graph
(
all_train_rating_pairs
,
all_train_rating_values
,
add_support
=
True
)
self
.
test_enc_graph
=
self
.
_generate_enc_graph
(
all_train_rating_pairs
,
all_train_rating_values
,
add_support
=
True
)
self
.
test_dec_graph
=
self
.
_generate_dec_graph
(
test_rating_pairs
)
self
.
test_dec_graph
=
self
.
_generate_dec_graph
(
test_rating_pairs
)
self
.
test_labels
=
_make_labels
(
test_rating_values
)
self
.
test_labels
=
_make_labels
(
test_rating_values
)
self
.
test_truths
=
th
.
FloatTensor
(
test_rating_values
).
to
(
device
)
self
.
test_truths
=
th
.
FloatTensor
(
test_rating_values
).
to
(
device
)
...
@@ -215,71 +308,118 @@ class MovieLens(object):
...
@@ -215,71 +308,118 @@ class MovieLens(object):
rst
+=
graph
.
number_of_edges
(
str
(
r
))
rst
+=
graph
.
number_of_edges
(
str
(
r
))
return
rst
return
rst
print
(
"Train enc graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
print
(
self
.
train_enc_graph
.
number_of_nodes
(
'user'
),
self
.
train_enc_graph
.
number_of_nodes
(
'movie'
),
"Train enc graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
_npairs
(
self
.
train_enc_graph
)))
self
.
train_enc_graph
.
number_of_nodes
(
"user"
),
print
(
"Train dec graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
self
.
train_enc_graph
.
number_of_nodes
(
"movie"
),
self
.
train_dec_graph
.
number_of_nodes
(
'user'
),
self
.
train_dec_graph
.
number_of_nodes
(
'movie'
),
_npairs
(
self
.
train_enc_graph
),
self
.
train_dec_graph
.
number_of_edges
()))
)
print
(
"Valid enc graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
)
self
.
valid_enc_graph
.
number_of_nodes
(
'user'
),
self
.
valid_enc_graph
.
number_of_nodes
(
'movie'
),
print
(
_npairs
(
self
.
valid_enc_graph
)))
"Train dec graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
print
(
"Valid dec graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
self
.
train_dec_graph
.
number_of_nodes
(
"user"
),
self
.
valid_dec_graph
.
number_of_nodes
(
'user'
),
self
.
valid_dec_graph
.
number_of_nodes
(
'movie'
),
self
.
train_dec_graph
.
number_of_nodes
(
"movie"
),
self
.
valid_dec_graph
.
number_of_edges
()))
self
.
train_dec_graph
.
number_of_edges
(),
print
(
"Test enc graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
)
self
.
test_enc_graph
.
number_of_nodes
(
'user'
),
self
.
test_enc_graph
.
number_of_nodes
(
'movie'
),
)
_npairs
(
self
.
test_enc_graph
)))
print
(
print
(
"Test dec graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
"Valid enc graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
self
.
test_dec_graph
.
number_of_nodes
(
'user'
),
self
.
test_dec_graph
.
number_of_nodes
(
'movie'
),
self
.
valid_enc_graph
.
number_of_nodes
(
"user"
),
self
.
test_dec_graph
.
number_of_edges
()))
self
.
valid_enc_graph
.
number_of_nodes
(
"movie"
),
_npairs
(
self
.
valid_enc_graph
),
)
)
print
(
"Valid dec graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
self
.
valid_dec_graph
.
number_of_nodes
(
"user"
),
self
.
valid_dec_graph
.
number_of_nodes
(
"movie"
),
self
.
valid_dec_graph
.
number_of_edges
(),
)
)
print
(
"Test enc graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
self
.
test_enc_graph
.
number_of_nodes
(
"user"
),
self
.
test_enc_graph
.
number_of_nodes
(
"movie"
),
_npairs
(
self
.
test_enc_graph
),
)
)
print
(
"Test dec graph:
\t
#user:{}
\t
#movie:{}
\t
#pairs:{}"
.
format
(
self
.
test_dec_graph
.
number_of_nodes
(
"user"
),
self
.
test_dec_graph
.
number_of_nodes
(
"movie"
),
self
.
test_dec_graph
.
number_of_edges
(),
)
)
def
_generate_pair_value
(
self
,
rating_info
):
def
_generate_pair_value
(
self
,
rating_info
):
rating_pairs
=
(
np
.
array
([
self
.
global_user_id_map
[
ele
]
for
ele
in
rating_info
[
"user_id"
]],
rating_pairs
=
(
dtype
=
np
.
int64
),
np
.
array
(
np
.
array
([
self
.
global_movie_id_map
[
ele
]
for
ele
in
rating_info
[
"movie_id"
]],
[
dtype
=
np
.
int64
))
self
.
global_user_id_map
[
ele
]
for
ele
in
rating_info
[
"user_id"
]
],
dtype
=
np
.
int64
,
),
np
.
array
(
[
self
.
global_movie_id_map
[
ele
]
for
ele
in
rating_info
[
"movie_id"
]
],
dtype
=
np
.
int64
,
),
)
rating_values
=
rating_info
[
"rating"
].
values
.
astype
(
np
.
float32
)
rating_values
=
rating_info
[
"rating"
].
values
.
astype
(
np
.
float32
)
return
rating_pairs
,
rating_values
return
rating_pairs
,
rating_values
def
_generate_enc_graph
(
self
,
rating_pairs
,
rating_values
,
add_support
=
False
):
def
_generate_enc_graph
(
user_movie_R
=
np
.
zeros
((
self
.
_num_user
,
self
.
_num_movie
),
dtype
=
np
.
float32
)
self
,
rating_pairs
,
rating_values
,
add_support
=
False
):
user_movie_R
=
np
.
zeros
(
(
self
.
_num_user
,
self
.
_num_movie
),
dtype
=
np
.
float32
)
user_movie_R
[
rating_pairs
]
=
rating_values
user_movie_R
[
rating_pairs
]
=
rating_values
data_dict
=
dict
()
data_dict
=
dict
()
num_nodes_dict
=
{
'
user
'
:
self
.
_num_user
,
'
movie
'
:
self
.
_num_movie
}
num_nodes_dict
=
{
"
user
"
:
self
.
_num_user
,
"
movie
"
:
self
.
_num_movie
}
rating_row
,
rating_col
=
rating_pairs
rating_row
,
rating_col
=
rating_pairs
for
rating
in
self
.
possible_rating_values
:
for
rating
in
self
.
possible_rating_values
:
ridx
=
np
.
where
(
rating_values
==
rating
)
ridx
=
np
.
where
(
rating_values
==
rating
)
rrow
=
rating_row
[
ridx
]
rrow
=
rating_row
[
ridx
]
rcol
=
rating_col
[
ridx
]
rcol
=
rating_col
[
ridx
]
rating
=
to_etype_name
(
rating
)
rating
=
to_etype_name
(
rating
)
data_dict
.
update
({
data_dict
.
update
(
(
'user'
,
str
(
rating
),
'movie'
):
(
rrow
,
rcol
),
{
(
'movie'
,
'rev-%s'
%
str
(
rating
),
'user'
):
(
rcol
,
rrow
)
(
"user"
,
str
(
rating
),
"movie"
):
(
rrow
,
rcol
),
})
(
"movie"
,
"rev-%s"
%
str
(
rating
),
"user"
):
(
rcol
,
rrow
),
}
)
graph
=
dgl
.
heterograph
(
data_dict
,
num_nodes_dict
=
num_nodes_dict
)
graph
=
dgl
.
heterograph
(
data_dict
,
num_nodes_dict
=
num_nodes_dict
)
# sanity check
# sanity check
assert
len
(
rating_pairs
[
0
])
==
sum
([
graph
.
number_of_edges
(
et
)
for
et
in
graph
.
etypes
])
//
2
assert
(
len
(
rating_pairs
[
0
])
==
sum
([
graph
.
number_of_edges
(
et
)
for
et
in
graph
.
etypes
])
//
2
)
if
add_support
:
if
add_support
:
def
_calc_norm
(
x
):
def
_calc_norm
(
x
):
x
=
x
.
numpy
().
astype
(
'
float32
'
)
x
=
x
.
numpy
().
astype
(
"
float32
"
)
x
[
x
==
0.
]
=
np
.
inf
x
[
x
==
0.
0
]
=
np
.
inf
x
=
th
.
FloatTensor
(
1.
/
np
.
sqrt
(
x
))
x
=
th
.
FloatTensor
(
1.
0
/
np
.
sqrt
(
x
))
return
x
.
unsqueeze
(
1
)
return
x
.
unsqueeze
(
1
)
user_ci
=
[]
user_ci
=
[]
user_cj
=
[]
user_cj
=
[]
movie_ci
=
[]
movie_ci
=
[]
movie_cj
=
[]
movie_cj
=
[]
for
r
in
self
.
possible_rating_values
:
for
r
in
self
.
possible_rating_values
:
r
=
to_etype_name
(
r
)
r
=
to_etype_name
(
r
)
user_ci
.
append
(
graph
[
'
rev-%s
'
%
r
].
in_degrees
())
user_ci
.
append
(
graph
[
"
rev-%s
"
%
r
].
in_degrees
())
movie_ci
.
append
(
graph
[
r
].
in_degrees
())
movie_ci
.
append
(
graph
[
r
].
in_degrees
())
if
self
.
_symm
:
if
self
.
_symm
:
user_cj
.
append
(
graph
[
r
].
out_degrees
())
user_cj
.
append
(
graph
[
r
].
out_degrees
())
movie_cj
.
append
(
graph
[
'
rev-%s
'
%
r
].
out_degrees
())
movie_cj
.
append
(
graph
[
"
rev-%s
"
%
r
].
out_degrees
())
else
:
else
:
user_cj
.
append
(
th
.
zeros
((
self
.
num_user
,)))
user_cj
.
append
(
th
.
zeros
((
self
.
num_user
,)))
movie_cj
.
append
(
th
.
zeros
((
self
.
num_movie
,)))
movie_cj
.
append
(
th
.
zeros
((
self
.
num_movie
,)))
...
@@ -289,10 +429,14 @@ class MovieLens(object):
...
@@ -289,10 +429,14 @@ class MovieLens(object):
user_cj
=
_calc_norm
(
sum
(
user_cj
))
user_cj
=
_calc_norm
(
sum
(
user_cj
))
movie_cj
=
_calc_norm
(
sum
(
movie_cj
))
movie_cj
=
_calc_norm
(
sum
(
movie_cj
))
else
:
else
:
user_cj
=
th
.
ones
(
self
.
num_user
,)
user_cj
=
th
.
ones
(
movie_cj
=
th
.
ones
(
self
.
num_movie
,)
self
.
num_user
,
graph
.
nodes
[
'user'
].
data
.
update
({
'ci'
:
user_ci
,
'cj'
:
user_cj
})
)
graph
.
nodes
[
'movie'
].
data
.
update
({
'ci'
:
movie_ci
,
'cj'
:
movie_cj
})
movie_cj
=
th
.
ones
(
self
.
num_movie
,
)
graph
.
nodes
[
"user"
].
data
.
update
({
"ci"
:
user_ci
,
"cj"
:
user_cj
})
graph
.
nodes
[
"movie"
].
data
.
update
({
"ci"
:
movie_ci
,
"cj"
:
movie_cj
})
return
graph
return
graph
...
@@ -300,10 +444,16 @@ class MovieLens(object):
...
@@ -300,10 +444,16 @@ class MovieLens(object):
ones
=
np
.
ones_like
(
rating_pairs
[
0
])
ones
=
np
.
ones_like
(
rating_pairs
[
0
])
user_movie_ratings_coo
=
sp
.
coo_matrix
(
user_movie_ratings_coo
=
sp
.
coo_matrix
(
(
ones
,
rating_pairs
),
(
ones
,
rating_pairs
),
shape
=
(
self
.
num_user
,
self
.
num_movie
),
dtype
=
np
.
float32
)
shape
=
(
self
.
num_user
,
self
.
num_movie
),
g
=
dgl
.
bipartite_from_scipy
(
user_movie_ratings_coo
,
utype
=
'_U'
,
etype
=
'_E'
,
vtype
=
'_V'
)
dtype
=
np
.
float32
,
return
dgl
.
heterograph
({(
'user'
,
'rate'
,
'movie'
):
g
.
edges
()},
)
num_nodes_dict
=
{
'user'
:
self
.
num_user
,
'movie'
:
self
.
num_movie
})
g
=
dgl
.
bipartite_from_scipy
(
user_movie_ratings_coo
,
utype
=
"_U"
,
etype
=
"_E"
,
vtype
=
"_V"
)
return
dgl
.
heterograph
(
{(
"user"
,
"rate"
,
"movie"
):
g
.
edges
()},
num_nodes_dict
=
{
"user"
:
self
.
num_user
,
"movie"
:
self
.
num_movie
},
)
@
property
@
property
def
num_links
(
self
):
def
num_links
(
self
):
...
@@ -317,15 +467,24 @@ class MovieLens(object):
...
@@ -317,15 +467,24 @@ class MovieLens(object):
def
num_movie
(
self
):
def
num_movie
(
self
):
return
self
.
_num_movie
return
self
.
_num_movie
def
_drop_unseen_nodes
(
self
,
orign_info
,
cmp_col_name
,
reserved_ids_set
,
label
):
def
_drop_unseen_nodes
(
self
,
orign_info
,
cmp_col_name
,
reserved_ids_set
,
label
):
# print(" -----------------")
# print(" -----------------")
# print("{}: {}(reserved) v.s. {}(from info)".format(label, len(reserved_ids_set),
# print("{}: {}(reserved) v.s. {}(from info)".format(label, len(reserved_ids_set),
# len(set(orign_info[cmp_col_name].values))))
# len(set(orign_info[cmp_col_name].values))))
if
reserved_ids_set
!=
set
(
orign_info
[
cmp_col_name
].
values
):
if
reserved_ids_set
!=
set
(
orign_info
[
cmp_col_name
].
values
):
pd_rating_ids
=
pd
.
DataFrame
(
list
(
reserved_ids_set
),
columns
=
[
"id_graph"
])
pd_rating_ids
=
pd
.
DataFrame
(
list
(
reserved_ids_set
),
columns
=
[
"id_graph"
]
)
# print("\torign_info: ({}, {})".format(orign_info.shape[0], orign_info.shape[1]))
# print("\torign_info: ({}, {})".format(orign_info.shape[0], orign_info.shape[1]))
data_info
=
orign_info
.
merge
(
pd_rating_ids
,
left_on
=
cmp_col_name
,
right_on
=
'id_graph'
,
how
=
'outer'
)
data_info
=
orign_info
.
merge
(
data_info
=
data_info
.
dropna
(
subset
=
[
cmp_col_name
,
'id_graph'
])
pd_rating_ids
,
left_on
=
cmp_col_name
,
right_on
=
"id_graph"
,
how
=
"outer"
,
)
data_info
=
data_info
.
dropna
(
subset
=
[
cmp_col_name
,
"id_graph"
])
data_info
=
data_info
.
drop
(
columns
=
[
"id_graph"
])
data_info
=
data_info
.
drop
(
columns
=
[
"id_graph"
])
data_info
=
data_info
.
reset_index
(
drop
=
True
)
data_info
=
data_info
.
reset_index
(
drop
=
True
)
# print("\tAfter dropping, data shape: ({}, {})".format(data_info.shape[0], data_info.shape[1]))
# print("\tAfter dropping, data shape: ({}, {})".format(data_info.shape[0], data_info.shape[1]))
...
@@ -354,10 +513,18 @@ class MovieLens(object):
...
@@ -354,10 +513,18 @@ class MovieLens(object):
rating_info : pd.DataFrame
rating_info : pd.DataFrame
"""
"""
rating_info
=
pd
.
read_csv
(
rating_info
=
pd
.
read_csv
(
file_path
,
sep
=
sep
,
header
=
None
,
file_path
,
names
=
[
'user_id'
,
'movie_id'
,
'rating'
,
'timestamp'
],
sep
=
sep
,
dtype
=
{
'user_id'
:
np
.
int32
,
'movie_id'
:
np
.
int32
,
header
=
None
,
'ratings'
:
np
.
float32
,
'timestamp'
:
np
.
int64
},
engine
=
'python'
)
names
=
[
"user_id"
,
"movie_id"
,
"rating"
,
"timestamp"
],
dtype
=
{
"user_id"
:
np
.
int32
,
"movie_id"
:
np
.
int32
,
"ratings"
:
np
.
float32
,
"timestamp"
:
np
.
int64
,
},
engine
=
"python"
,
)
return
rating_info
return
rating_info
def
_load_raw_user_info
(
self
):
def
_load_raw_user_info
(
self
):
...
@@ -379,20 +546,40 @@ class MovieLens(object):
...
@@ -379,20 +546,40 @@ class MovieLens(object):
-------
-------
user_info : pd.DataFrame
user_info : pd.DataFrame
"""
"""
if
self
.
_name
==
'ml-100k'
:
if
self
.
_name
==
"ml-100k"
:
self
.
user_info
=
pd
.
read_csv
(
os
.
path
.
join
(
self
.
_dir
,
'u.user'
),
sep
=
'|'
,
header
=
None
,
self
.
user_info
=
pd
.
read_csv
(
names
=
[
'id'
,
'age'
,
'gender'
,
'occupation'
,
'zip_code'
],
engine
=
'python'
)
os
.
path
.
join
(
self
.
_dir
,
"u.user"
),
elif
self
.
_name
==
'ml-1m'
:
sep
=
"|"
,
self
.
user_info
=
pd
.
read_csv
(
os
.
path
.
join
(
self
.
_dir
,
'users.dat'
),
sep
=
'::'
,
header
=
None
,
header
=
None
,
names
=
[
'id'
,
'gender'
,
'age'
,
'occupation'
,
'zip_code'
],
engine
=
'python'
)
names
=
[
"id"
,
"age"
,
"gender"
,
"occupation"
,
"zip_code"
],
elif
self
.
_name
==
'ml-10m'
:
engine
=
"python"
,
)
elif
self
.
_name
==
"ml-1m"
:
self
.
user_info
=
pd
.
read_csv
(
os
.
path
.
join
(
self
.
_dir
,
"users.dat"
),
sep
=
"::"
,
header
=
None
,
names
=
[
"id"
,
"gender"
,
"age"
,
"occupation"
,
"zip_code"
],
engine
=
"python"
,
)
elif
self
.
_name
==
"ml-10m"
:
rating_info
=
pd
.
read_csv
(
rating_info
=
pd
.
read_csv
(
os
.
path
.
join
(
self
.
_dir
,
'ratings.dat'
),
sep
=
'::'
,
header
=
None
,
os
.
path
.
join
(
self
.
_dir
,
"ratings.dat"
),
names
=
[
'user_id'
,
'movie_id'
,
'rating'
,
'timestamp'
],
sep
=
"::"
,
dtype
=
{
'user_id'
:
np
.
int32
,
'movie_id'
:
np
.
int32
,
'ratings'
:
np
.
float32
,
header
=
None
,
'timestamp'
:
np
.
int64
},
engine
=
'python'
)
names
=
[
"user_id"
,
"movie_id"
,
"rating"
,
"timestamp"
],
self
.
user_info
=
pd
.
DataFrame
(
np
.
unique
(
rating_info
[
'user_id'
].
values
.
astype
(
np
.
int32
)),
dtype
=
{
columns
=
[
'id'
])
"user_id"
:
np
.
int32
,
"movie_id"
:
np
.
int32
,
"ratings"
:
np
.
float32
,
"timestamp"
:
np
.
int64
,
},
engine
=
"python"
,
)
self
.
user_info
=
pd
.
DataFrame
(
np
.
unique
(
rating_info
[
"user_id"
].
values
.
astype
(
np
.
int32
)),
columns
=
[
"id"
],
)
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
...
@@ -412,20 +599,36 @@ class MovieLens(object):
...
@@ -412,20 +599,36 @@ class MovieLens(object):
user_features : np.ndarray
user_features : np.ndarray
"""
"""
if
self
.
_name
==
'
ml-100k
'
or
self
.
_name
==
'
ml-1m
'
:
if
self
.
_name
==
"
ml-100k
"
or
self
.
_name
==
"
ml-1m
"
:
ages
=
self
.
user_info
[
'
age
'
].
values
.
astype
(
np
.
float32
)
ages
=
self
.
user_info
[
"
age
"
].
values
.
astype
(
np
.
float32
)
gender
=
(
self
.
user_info
[
'
gender
'
]
==
'F'
).
values
.
astype
(
np
.
float32
)
gender
=
(
self
.
user_info
[
"
gender
"
]
==
"F"
).
values
.
astype
(
np
.
float32
)
all_occupations
=
set
(
self
.
user_info
[
'
occupation
'
])
all_occupations
=
set
(
self
.
user_info
[
"
occupation
"
])
occupation_map
=
{
ele
:
i
for
i
,
ele
in
enumerate
(
all_occupations
)}
occupation_map
=
{
ele
:
i
for
i
,
ele
in
enumerate
(
all_occupations
)}
occupation_one_hot
=
np
.
zeros
(
shape
=
(
self
.
user_info
.
shape
[
0
],
len
(
all_occupations
)),
occupation_one_hot
=
np
.
zeros
(
dtype
=
np
.
float32
)
shape
=
(
self
.
user_info
.
shape
[
0
],
len
(
all_occupations
)),
occupation_one_hot
[
np
.
arange
(
self
.
user_info
.
shape
[
0
]),
dtype
=
np
.
float32
,
np
.
array
([
occupation_map
[
ele
]
for
ele
in
self
.
user_info
[
'occupation'
]])]
=
1
)
user_features
=
np
.
concatenate
([
ages
.
reshape
((
self
.
user_info
.
shape
[
0
],
1
))
/
50.0
,
occupation_one_hot
[
gender
.
reshape
((
self
.
user_info
.
shape
[
0
],
1
)),
np
.
arange
(
self
.
user_info
.
shape
[
0
]),
occupation_one_hot
],
axis
=
1
)
np
.
array
(
elif
self
.
_name
==
'ml-10m'
:
[
user_features
=
np
.
zeros
(
shape
=
(
self
.
user_info
.
shape
[
0
],
1
),
dtype
=
np
.
float32
)
occupation_map
[
ele
]
for
ele
in
self
.
user_info
[
"occupation"
]
]
),
]
=
1
user_features
=
np
.
concatenate
(
[
ages
.
reshape
((
self
.
user_info
.
shape
[
0
],
1
))
/
50.0
,
gender
.
reshape
((
self
.
user_info
.
shape
[
0
],
1
)),
occupation_one_hot
,
],
axis
=
1
,
)
elif
self
.
_name
==
"ml-10m"
:
user_features
=
np
.
zeros
(
shape
=
(
self
.
user_info
.
shape
[
0
],
1
),
dtype
=
np
.
float32
)
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
return
user_features
return
user_features
...
@@ -453,35 +656,57 @@ class MovieLens(object):
...
@@ -453,35 +656,57 @@ class MovieLens(object):
For ml-100k, the column name is ['id', 'title', 'release_date', 'video_release_date', 'url'] + [GENRES (19)]]
For ml-100k, the column name is ['id', 'title', 'release_date', 'video_release_date', 'url'] + [GENRES (19)]]
For ml-1m and ml-10m, the column name is ['id', 'title'] + [GENRES (18/20)]]
For ml-1m and ml-10m, the column name is ['id', 'title'] + [GENRES (18/20)]]
"""
"""
if
self
.
_name
==
'
ml-100k
'
:
if
self
.
_name
==
"
ml-100k
"
:
GENRES
=
GENRES_ML_100K
GENRES
=
GENRES_ML_100K
elif
self
.
_name
==
'
ml-1m
'
:
elif
self
.
_name
==
"
ml-1m
"
:
GENRES
=
GENRES_ML_1M
GENRES
=
GENRES_ML_1M
elif
self
.
_name
==
'
ml-10m
'
:
elif
self
.
_name
==
"
ml-10m
"
:
GENRES
=
GENRES_ML_10M
GENRES
=
GENRES_ML_10M
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
if
self
.
_name
==
'ml-100k'
:
if
self
.
_name
==
"ml-100k"
:
file_path
=
os
.
path
.
join
(
self
.
_dir
,
'u.item'
)
file_path
=
os
.
path
.
join
(
self
.
_dir
,
"u.item"
)
self
.
movie_info
=
pd
.
read_csv
(
file_path
,
sep
=
'|'
,
header
=
None
,
self
.
movie_info
=
pd
.
read_csv
(
names
=
[
'id'
,
'title'
,
'release_date'
,
'video_release_date'
,
'url'
]
+
GENRES
,
file_path
,
encoding
=
'iso-8859-1'
)
sep
=
"|"
,
elif
self
.
_name
==
'ml-1m'
or
self
.
_name
==
'ml-10m'
:
header
=
None
,
file_path
=
os
.
path
.
join
(
self
.
_dir
,
'movies.dat'
)
names
=
[
movie_info
=
pd
.
read_csv
(
file_path
,
sep
=
'::'
,
header
=
None
,
"id"
,
names
=
[
'id'
,
'title'
,
'genres'
],
encoding
=
'iso-8859-1'
)
"title"
,
"release_date"
,
"video_release_date"
,
"url"
,
]
+
GENRES
,
encoding
=
"iso-8859-1"
,
)
elif
self
.
_name
==
"ml-1m"
or
self
.
_name
==
"ml-10m"
:
file_path
=
os
.
path
.
join
(
self
.
_dir
,
"movies.dat"
)
movie_info
=
pd
.
read_csv
(
file_path
,
sep
=
"::"
,
header
=
None
,
names
=
[
"id"
,
"title"
,
"genres"
],
encoding
=
"iso-8859-1"
,
)
genre_map
=
{
ele
:
i
for
i
,
ele
in
enumerate
(
GENRES
)}
genre_map
=
{
ele
:
i
for
i
,
ele
in
enumerate
(
GENRES
)}
genre_map
[
'Children
\'
s'
]
=
genre_map
[
'Children'
]
genre_map
[
"Children's"
]
=
genre_map
[
"Children"
]
genre_map
[
'Childrens'
]
=
genre_map
[
'Children'
]
genre_map
[
"Childrens"
]
=
genre_map
[
"Children"
]
movie_genres
=
np
.
zeros
(
shape
=
(
movie_info
.
shape
[
0
],
len
(
GENRES
)),
dtype
=
np
.
float32
)
movie_genres
=
np
.
zeros
(
for
i
,
genres
in
enumerate
(
movie_info
[
'genres'
]):
shape
=
(
movie_info
.
shape
[
0
],
len
(
GENRES
)),
dtype
=
np
.
float32
for
ele
in
genres
.
split
(
'|'
):
)
for
i
,
genres
in
enumerate
(
movie_info
[
"genres"
]):
for
ele
in
genres
.
split
(
"|"
):
if
ele
in
genre_map
:
if
ele
in
genre_map
:
movie_genres
[
i
,
genre_map
[
ele
]]
=
1.0
movie_genres
[
i
,
genre_map
[
ele
]]
=
1.0
else
:
else
:
print
(
'genres not found, filled with unknown: {}'
.
format
(
genres
))
print
(
movie_genres
[
i
,
genre_map
[
'unknown'
]]
=
1.0
"genres not found, filled with unknown: {}"
.
format
(
genres
)
)
movie_genres
[
i
,
genre_map
[
"unknown"
]]
=
1.0
for
idx
,
genre_name
in
enumerate
(
GENRES
):
for
idx
,
genre_name
in
enumerate
(
GENRES
):
assert
idx
==
genre_map
[
genre_name
]
assert
idx
==
genre_map
[
genre_name
]
movie_info
[
genre_name
]
=
movie_genres
[:,
idx
]
movie_info
[
genre_name
]
=
movie_genres
[:,
idx
]
...
@@ -506,39 +731,58 @@ class MovieLens(object):
...
@@ -506,39 +731,58 @@ class MovieLens(object):
import
torchtext
import
torchtext
from
torchtext.data.utils
import
get_tokenizer
from
torchtext.data.utils
import
get_tokenizer
if
self
.
_name
==
'
ml-100k
'
:
if
self
.
_name
==
"
ml-100k
"
:
GENRES
=
GENRES_ML_100K
GENRES
=
GENRES_ML_100K
elif
self
.
_name
==
'
ml-1m
'
:
elif
self
.
_name
==
"
ml-1m
"
:
GENRES
=
GENRES_ML_1M
GENRES
=
GENRES_ML_1M
elif
self
.
_name
==
'
ml-10m
'
:
elif
self
.
_name
==
"
ml-10m
"
:
GENRES
=
GENRES_ML_10M
GENRES
=
GENRES_ML_10M
else
:
else
:
raise
NotImplementedError
raise
NotImplementedError
# Old torchtext-legacy API commented below
# Old torchtext-legacy API commented below
# TEXT = torchtext.legacy.data.Field(tokenize='spacy', tokenizer_language='en_core_web_sm')
# TEXT = torchtext.legacy.data.Field(tokenize='spacy', tokenizer_language='en_core_web_sm')
tokenizer
=
get_tokenizer
(
'spacy'
,
language
=
'en_core_web_sm'
)
# new API (torchtext 0.9+)
tokenizer
=
get_tokenizer
(
embedding
=
torchtext
.
vocab
.
GloVe
(
name
=
'840B'
,
dim
=
300
)
"spacy"
,
language
=
"en_core_web_sm"
)
# new API (torchtext 0.9+)
title_embedding
=
np
.
zeros
(
shape
=
(
self
.
movie_info
.
shape
[
0
],
300
),
dtype
=
np
.
float32
)
embedding
=
torchtext
.
vocab
.
GloVe
(
name
=
"840B"
,
dim
=
300
)
release_years
=
np
.
zeros
(
shape
=
(
self
.
movie_info
.
shape
[
0
],
1
),
dtype
=
np
.
float32
)
p
=
re
.
compile
(
r
'(.+)\s*\((\d+)\)'
)
title_embedding
=
np
.
zeros
(
for
i
,
title
in
enumerate
(
self
.
movie_info
[
'title'
]):
shape
=
(
self
.
movie_info
.
shape
[
0
],
300
),
dtype
=
np
.
float32
)
release_years
=
np
.
zeros
(
shape
=
(
self
.
movie_info
.
shape
[
0
],
1
),
dtype
=
np
.
float32
)
p
=
re
.
compile
(
r
"(.+)\s*\((\d+)\)"
)
for
i
,
title
in
enumerate
(
self
.
movie_info
[
"title"
]):
match_res
=
p
.
match
(
title
)
match_res
=
p
.
match
(
title
)
if
match_res
is
None
:
if
match_res
is
None
:
print
(
'{} cannot be matched, index={}, name={}'
.
format
(
title
,
i
,
self
.
_name
))
print
(
"{} cannot be matched, index={}, name={}"
.
format
(
title
,
i
,
self
.
_name
)
)
title_context
,
year
=
title
,
1950
title_context
,
year
=
title
,
1950
else
:
else
:
title_context
,
year
=
match_res
.
groups
()
title_context
,
year
=
match_res
.
groups
()
# We use average of glove
# We use average of glove
# Upgraded torchtext API: TEXT.tokenize(title_context) --> tokenizer(title_context)
# Upgraded torchtext API: TEXT.tokenize(title_context) --> tokenizer(title_context)
title_embedding
[
i
,
:]
=
embedding
.
get_vecs_by_tokens
(
tokenizer
(
title_context
)).
numpy
().
mean
(
axis
=
0
)
title_embedding
[
i
,
:]
=
(
embedding
.
get_vecs_by_tokens
(
tokenizer
(
title_context
))
.
numpy
()
.
mean
(
axis
=
0
)
)
release_years
[
i
]
=
float
(
year
)
release_years
[
i
]
=
float
(
year
)
movie_features
=
np
.
concatenate
((
title_embedding
,
movie_features
=
np
.
concatenate
(
(
release_years
-
1950.0
)
/
100.0
,
(
self
.
movie_info
[
GENRES
]),
title_embedding
,
axis
=
1
)
(
release_years
-
1950.0
)
/
100.0
,
self
.
movie_info
[
GENRES
],
),
axis
=
1
,
)
return
movie_features
return
movie_features
if
__name__
==
'__main__'
:
MovieLens
(
"ml-100k"
,
device
=
th
.
device
(
'cpu'
),
symm
=
True
)
if
__name__
==
"__main__"
:
MovieLens
(
"ml-100k"
,
device
=
th
.
device
(
"cpu"
),
symm
=
True
)
examples/pytorch/gcmc/model.py
View file @
704bcaf6
"""NN modules"""
"""NN modules"""
import
dgl.function
as
fn
import
dgl.nn.pytorch
as
dglnn
import
torch
as
th
import
torch
as
th
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.nn
import
init
from
torch.nn
import
init
import
dgl.function
as
fn
import
dgl.nn.pytorch
as
dglnn
from
utils
import
get_activation
,
to_etype_name
from
utils
import
get_activation
,
to_etype_name
class
GCMCGraphConv
(
nn
.
Module
):
class
GCMCGraphConv
(
nn
.
Module
):
"""Graph convolution module used in the GCMC model.
"""Graph convolution module used in the GCMC model.
...
@@ -23,12 +24,10 @@ class GCMCGraphConv(nn.Module):
...
@@ -23,12 +24,10 @@ class GCMCGraphConv(nn.Module):
Which device to put data in. Useful in mix_cpu_gpu training and
Which device to put data in. Useful in mix_cpu_gpu training and
multi-gpu training
multi-gpu training
"""
"""
def
__init__
(
self
,
in_feats
,
def
__init__
(
out_feats
,
self
,
in_feats
,
out_feats
,
weight
=
True
,
device
=
None
,
dropout_rate
=
0.0
weight
=
True
,
):
device
=
None
,
dropout_rate
=
0.0
):
super
(
GCMCGraphConv
,
self
).
__init__
()
super
(
GCMCGraphConv
,
self
).
__init__
()
self
.
_in_feats
=
in_feats
self
.
_in_feats
=
in_feats
self
.
_out_feats
=
out_feats
self
.
_out_feats
=
out_feats
...
@@ -38,7 +37,7 @@ class GCMCGraphConv(nn.Module):
...
@@ -38,7 +37,7 @@ class GCMCGraphConv(nn.Module):
if
weight
:
if
weight
:
self
.
weight
=
nn
.
Parameter
(
th
.
Tensor
(
in_feats
,
out_feats
))
self
.
weight
=
nn
.
Parameter
(
th
.
Tensor
(
in_feats
,
out_feats
))
else
:
else
:
self
.
register_parameter
(
'
weight
'
,
None
)
self
.
register_parameter
(
"
weight
"
,
None
)
self
.
reset_parameters
()
self
.
reset_parameters
()
def
reset_parameters
(
self
):
def
reset_parameters
(
self
):
...
@@ -70,17 +69,19 @@ class GCMCGraphConv(nn.Module):
...
@@ -70,17 +69,19 @@ class GCMCGraphConv(nn.Module):
"""
"""
with
graph
.
local_scope
():
with
graph
.
local_scope
():
if
isinstance
(
feat
,
tuple
):
if
isinstance
(
feat
,
tuple
):
feat
,
_
=
feat
# dst feature not used
feat
,
_
=
feat
# dst feature not used
cj
=
graph
.
srcdata
[
'
cj
'
]
cj
=
graph
.
srcdata
[
"
cj
"
]
ci
=
graph
.
dstdata
[
'
ci
'
]
ci
=
graph
.
dstdata
[
"
ci
"
]
if
self
.
device
is
not
None
:
if
self
.
device
is
not
None
:
cj
=
cj
.
to
(
self
.
device
)
cj
=
cj
.
to
(
self
.
device
)
ci
=
ci
.
to
(
self
.
device
)
ci
=
ci
.
to
(
self
.
device
)
if
weight
is
not
None
:
if
weight
is
not
None
:
if
self
.
weight
is
not
None
:
if
self
.
weight
is
not
None
:
raise
DGLError
(
'External weight is provided while at the same time the'
raise
DGLError
(
' module has defined its own weight parameter. Please'
"External weight is provided while at the same time the"
' create the module with flag weight=False.'
)
" module has defined its own weight parameter. Please"
" create the module with flag weight=False."
)
else
:
else
:
weight
=
self
.
weight
weight
=
self
.
weight
...
@@ -88,14 +89,16 @@ class GCMCGraphConv(nn.Module):
...
@@ -88,14 +89,16 @@ class GCMCGraphConv(nn.Module):
feat
=
dot_or_identity
(
feat
,
weight
,
self
.
device
)
feat
=
dot_or_identity
(
feat
,
weight
,
self
.
device
)
feat
=
feat
*
self
.
dropout
(
cj
)
feat
=
feat
*
self
.
dropout
(
cj
)
graph
.
srcdata
[
'h'
]
=
feat
graph
.
srcdata
[
"h"
]
=
feat
graph
.
update_all
(
fn
.
copy_u
(
u
=
'h'
,
out
=
'm'
),
graph
.
update_all
(
fn
.
sum
(
msg
=
'm'
,
out
=
'h'
))
fn
.
copy_u
(
u
=
"h"
,
out
=
"m"
),
fn
.
sum
(
msg
=
"m"
,
out
=
"h"
)
rst
=
graph
.
dstdata
[
'h'
]
)
rst
=
graph
.
dstdata
[
"h"
]
rst
=
rst
*
ci
rst
=
rst
*
ci
return
rst
return
rst
class
GCMCLayer
(
nn
.
Module
):
class
GCMCLayer
(
nn
.
Module
):
r
"""GCMC layer
r
"""GCMC layer
...
@@ -144,18 +147,21 @@ class GCMCLayer(nn.Module):
...
@@ -144,18 +147,21 @@ class GCMCLayer(nn.Module):
Which device to put data in. Useful in mix_cpu_gpu training and
Which device to put data in. Useful in mix_cpu_gpu training and
multi-gpu training
multi-gpu training
"""
"""
def
__init__
(
self
,
rating_vals
,
def
__init__
(
user_in_units
,
self
,
movie_in_units
,
rating_vals
,
msg_units
,
user_in_units
,
out_units
,
movie_in_units
,
dropout_rate
=
0.0
,
msg_units
,
agg
=
'stack'
,
# or 'sum'
out_units
,
agg_act
=
None
,
dropout_rate
=
0.0
,
out_act
=
None
,
agg
=
"stack"
,
# or 'sum'
share_user_item_param
=
False
,
agg_act
=
None
,
device
=
None
):
out_act
=
None
,
share_user_item_param
=
False
,
device
=
None
,
):
super
(
GCMCLayer
,
self
).
__init__
()
super
(
GCMCLayer
,
self
).
__init__
()
self
.
rating_vals
=
rating_vals
self
.
rating_vals
=
rating_vals
self
.
agg
=
agg
self
.
agg
=
agg
...
@@ -165,7 +171,7 @@ class GCMCLayer(nn.Module):
...
@@ -165,7 +171,7 @@ class GCMCLayer(nn.Module):
self
.
ifc
=
self
.
ufc
self
.
ifc
=
self
.
ufc
else
:
else
:
self
.
ifc
=
nn
.
Linear
(
msg_units
,
out_units
)
self
.
ifc
=
nn
.
Linear
(
msg_units
,
out_units
)
if
agg
==
'
stack
'
:
if
agg
==
"
stack
"
:
# divide the original msg unit size by number of ratings to keep
# divide the original msg unit size by number of ratings to keep
# the dimensionality
# the dimensionality
assert
msg_units
%
len
(
rating_vals
)
==
0
assert
msg_units
%
len
(
rating_vals
)
==
0
...
@@ -176,32 +182,42 @@ class GCMCLayer(nn.Module):
...
@@ -176,32 +182,42 @@ class GCMCLayer(nn.Module):
for
rating
in
rating_vals
:
for
rating
in
rating_vals
:
# PyTorch parameter name can't contain "."
# PyTorch parameter name can't contain "."
rating
=
to_etype_name
(
rating
)
rating
=
to_etype_name
(
rating
)
rev_rating
=
'
rev-%s
'
%
rating
rev_rating
=
"
rev-%s
"
%
rating
if
share_user_item_param
and
user_in_units
==
movie_in_units
:
if
share_user_item_param
and
user_in_units
==
movie_in_units
:
self
.
W_r
[
rating
]
=
nn
.
Parameter
(
th
.
randn
(
user_in_units
,
msg_units
))
self
.
W_r
[
rating
]
=
nn
.
Parameter
(
self
.
W_r
[
'rev-%s'
%
rating
]
=
self
.
W_r
[
rating
]
th
.
randn
(
user_in_units
,
msg_units
)
subConv
[
rating
]
=
GCMCGraphConv
(
user_in_units
,
)
msg_units
,
self
.
W_r
[
"rev-%s"
%
rating
]
=
self
.
W_r
[
rating
]
weight
=
False
,
subConv
[
rating
]
=
GCMCGraphConv
(
device
=
device
,
user_in_units
,
dropout_rate
=
dropout_rate
)
msg_units
,
subConv
[
rev_rating
]
=
GCMCGraphConv
(
user_in_units
,
weight
=
False
,
msg_units
,
device
=
device
,
weight
=
False
,
dropout_rate
=
dropout_rate
,
device
=
device
,
)
dropout_rate
=
dropout_rate
)
subConv
[
rev_rating
]
=
GCMCGraphConv
(
user_in_units
,
msg_units
,
weight
=
False
,
device
=
device
,
dropout_rate
=
dropout_rate
,
)
else
:
else
:
self
.
W_r
=
None
self
.
W_r
=
None
subConv
[
rating
]
=
GCMCGraphConv
(
user_in_units
,
subConv
[
rating
]
=
GCMCGraphConv
(
msg_units
,
user_in_units
,
weight
=
True
,
msg_units
,
device
=
device
,
weight
=
True
,
dropout_rate
=
dropout_rate
)
device
=
device
,
subConv
[
rev_rating
]
=
GCMCGraphConv
(
movie_in_units
,
dropout_rate
=
dropout_rate
,
msg_units
,
)
weight
=
True
,
subConv
[
rev_rating
]
=
GCMCGraphConv
(
device
=
device
,
movie_in_units
,
dropout_rate
=
dropout_rate
)
msg_units
,
weight
=
True
,
device
=
device
,
dropout_rate
=
dropout_rate
,
)
self
.
conv
=
dglnn
.
HeteroGraphConv
(
subConv
,
aggregate
=
agg
)
self
.
conv
=
dglnn
.
HeteroGraphConv
(
subConv
,
aggregate
=
agg
)
self
.
agg_act
=
get_activation
(
agg_act
)
self
.
agg_act
=
get_activation
(
agg_act
)
self
.
out_act
=
get_activation
(
out_act
)
self
.
out_act
=
get_activation
(
out_act
)
...
@@ -248,16 +264,20 @@ class GCMCLayer(nn.Module):
...
@@ -248,16 +264,20 @@ class GCMCLayer(nn.Module):
new_ifeat : torch.Tensor
new_ifeat : torch.Tensor
New movie features
New movie features
"""
"""
in_feats
=
{
'
user
'
:
ufeat
,
'
movie
'
:
ifeat
}
in_feats
=
{
"
user
"
:
ufeat
,
"
movie
"
:
ifeat
}
mod_args
=
{}
mod_args
=
{}
for
i
,
rating
in
enumerate
(
self
.
rating_vals
):
for
i
,
rating
in
enumerate
(
self
.
rating_vals
):
rating
=
to_etype_name
(
rating
)
rating
=
to_etype_name
(
rating
)
rev_rating
=
'rev-%s'
%
rating
rev_rating
=
"rev-%s"
%
rating
mod_args
[
rating
]
=
(
self
.
W_r
[
rating
]
if
self
.
W_r
is
not
None
else
None
,)
mod_args
[
rating
]
=
(
mod_args
[
rev_rating
]
=
(
self
.
W_r
[
rev_rating
]
if
self
.
W_r
is
not
None
else
None
,)
self
.
W_r
[
rating
]
if
self
.
W_r
is
not
None
else
None
,
)
mod_args
[
rev_rating
]
=
(
self
.
W_r
[
rev_rating
]
if
self
.
W_r
is
not
None
else
None
,
)
out_feats
=
self
.
conv
(
graph
,
in_feats
,
mod_args
=
mod_args
)
out_feats
=
self
.
conv
(
graph
,
in_feats
,
mod_args
=
mod_args
)
ufeat
=
out_feats
[
'
user
'
]
ufeat
=
out_feats
[
"
user
"
]
ifeat
=
out_feats
[
'
movie
'
]
ifeat
=
out_feats
[
"
movie
"
]
ufeat
=
ufeat
.
view
(
ufeat
.
shape
[
0
],
-
1
)
ufeat
=
ufeat
.
view
(
ufeat
.
shape
[
0
],
-
1
)
ifeat
=
ifeat
.
view
(
ifeat
.
shape
[
0
],
-
1
)
ifeat
=
ifeat
.
view
(
ifeat
.
shape
[
0
],
-
1
)
...
@@ -270,6 +290,7 @@ class GCMCLayer(nn.Module):
...
@@ -270,6 +290,7 @@ class GCMCLayer(nn.Module):
ifeat
=
self
.
ifc
(
ifeat
)
ifeat
=
self
.
ifc
(
ifeat
)
return
self
.
out_act
(
ufeat
),
self
.
out_act
(
ifeat
)
return
self
.
out_act
(
ufeat
),
self
.
out_act
(
ifeat
)
class
BiDecoder
(
nn
.
Module
):
class
BiDecoder
(
nn
.
Module
):
r
"""Bi-linear decoder.
r
"""Bi-linear decoder.
...
@@ -296,17 +317,14 @@ class BiDecoder(nn.Module):
...
@@ -296,17 +317,14 @@ class BiDecoder(nn.Module):
dropout_rate : float, optional
dropout_rate : float, optional
Dropout raite (Default: 0.0)
Dropout raite (Default: 0.0)
"""
"""
def
__init__
(
self
,
in_units
,
def
__init__
(
self
,
in_units
,
num_classes
,
num_basis
=
2
,
dropout_rate
=
0.0
):
num_classes
,
num_basis
=
2
,
dropout_rate
=
0.0
):
super
(
BiDecoder
,
self
).
__init__
()
super
(
BiDecoder
,
self
).
__init__
()
self
.
_num_basis
=
num_basis
self
.
_num_basis
=
num_basis
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
Ps
=
nn
.
ParameterList
(
self
.
Ps
=
nn
.
ParameterList
(
nn
.
Parameter
(
th
.
randn
(
in_units
,
in_units
))
nn
.
Parameter
(
th
.
randn
(
in_units
,
in_units
))
for
_
in
range
(
num_basis
)
for
_
in
range
(
num_basis
)
)
)
self
.
combine_basis
=
nn
.
Linear
(
self
.
_num_basis
,
num_classes
,
bias
=
False
)
self
.
combine_basis
=
nn
.
Linear
(
self
.
_num_basis
,
num_classes
,
bias
=
False
)
self
.
reset_parameters
()
self
.
reset_parameters
()
...
@@ -335,16 +353,17 @@ class BiDecoder(nn.Module):
...
@@ -335,16 +353,17 @@ class BiDecoder(nn.Module):
with
graph
.
local_scope
():
with
graph
.
local_scope
():
ufeat
=
self
.
dropout
(
ufeat
)
ufeat
=
self
.
dropout
(
ufeat
)
ifeat
=
self
.
dropout
(
ifeat
)
ifeat
=
self
.
dropout
(
ifeat
)
graph
.
nodes
[
'
movie
'
].
data
[
'h'
]
=
ifeat
graph
.
nodes
[
"
movie
"
].
data
[
"h"
]
=
ifeat
basis_out
=
[]
basis_out
=
[]
for
i
in
range
(
self
.
_num_basis
):
for
i
in
range
(
self
.
_num_basis
):
graph
.
nodes
[
'
user
'
].
data
[
'h'
]
=
ufeat
@
self
.
Ps
[
i
]
graph
.
nodes
[
"
user
"
].
data
[
"h"
]
=
ufeat
@
self
.
Ps
[
i
]
graph
.
apply_edges
(
fn
.
u_dot_v
(
'h'
,
'h'
,
'
sr
'
))
graph
.
apply_edges
(
fn
.
u_dot_v
(
"h"
,
"h"
,
"
sr
"
))
basis_out
.
append
(
graph
.
edata
[
'
sr
'
])
basis_out
.
append
(
graph
.
edata
[
"
sr
"
])
out
=
th
.
cat
(
basis_out
,
dim
=
1
)
out
=
th
.
cat
(
basis_out
,
dim
=
1
)
out
=
self
.
combine_basis
(
out
)
out
=
self
.
combine_basis
(
out
)
return
out
return
out
class
DenseBiDecoder
(
nn
.
Module
):
class
DenseBiDecoder
(
nn
.
Module
):
r
"""Dense bi-linear decoder.
r
"""Dense bi-linear decoder.
...
@@ -363,11 +382,8 @@ class DenseBiDecoder(nn.Module):
...
@@ -363,11 +382,8 @@ class DenseBiDecoder(nn.Module):
dropout_rate : float, optional
dropout_rate : float, optional
Dropout raite (Default: 0.0)
Dropout raite (Default: 0.0)
"""
"""
def
__init__
(
self
,
in_units
,
def
__init__
(
self
,
in_units
,
num_classes
,
num_basis
=
2
,
dropout_rate
=
0.0
):
num_classes
,
num_basis
=
2
,
dropout_rate
=
0.0
):
super
().
__init__
()
super
().
__init__
()
self
.
_num_basis
=
num_basis
self
.
_num_basis
=
num_basis
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
self
.
dropout
=
nn
.
Dropout
(
dropout_rate
)
...
@@ -399,10 +415,11 @@ class DenseBiDecoder(nn.Module):
...
@@ -399,10 +415,11 @@ class DenseBiDecoder(nn.Module):
"""
"""
ufeat
=
self
.
dropout
(
ufeat
)
ufeat
=
self
.
dropout
(
ufeat
)
ifeat
=
self
.
dropout
(
ifeat
)
ifeat
=
self
.
dropout
(
ifeat
)
out
=
th
.
einsum
(
'
ai,bij,aj->ab
'
,
ufeat
,
self
.
P
,
ifeat
)
out
=
th
.
einsum
(
"
ai,bij,aj->ab
"
,
ufeat
,
self
.
P
,
ifeat
)
out
=
self
.
combine_basis
(
out
)
out
=
self
.
combine_basis
(
out
)
return
out
return
out
def
dot_or_identity
(
A
,
B
,
device
=
None
):
def
dot_or_identity
(
A
,
B
,
device
=
None
):
# if A is None, treat as identity matrix
# if A is None, treat as identity matrix
if
A
is
None
:
if
A
is
None
:
...
...
examples/pytorch/gcmc/train.py
View file @
704bcaf6
...
@@ -14,8 +14,13 @@ import torch as th
...
@@ -14,8 +14,13 @@ import torch as th
import
torch.nn
as
nn
import
torch.nn
as
nn
from
data
import
MovieLens
from
data
import
MovieLens
from
model
import
BiDecoder
,
GCMCLayer
from
model
import
BiDecoder
,
GCMCLayer
from
utils
import
(
MetricLogger
,
get_activation
,
get_optimizer
,
torch_net_info
,
from
utils
import
(
torch_total_param_num
)
get_activation
,
get_optimizer
,
MetricLogger
,
torch_net_info
,
torch_total_param_num
,
)
class
Net
(
nn
.
Module
):
class
Net
(
nn
.
Module
):
...
...
examples/pytorch/gcmc/train_sampling.py
View file @
704bcaf6
...
@@ -4,38 +4,49 @@ The script loads the full graph in CPU and samples subgraphs for computing
...
@@ -4,38 +4,49 @@ The script loads the full graph in CPU and samples subgraphs for computing
gradients on the training device. The script also supports multi-GPU for
gradients on the training device. The script also supports multi-GPU for
further acceleration.
further acceleration.
"""
"""
import
os
,
time
import
argparse
import
argparse
import
logging
import
logging
import
os
,
time
import
random
import
random
import
string
import
string
import
traceback
import
traceback
import
dgl
import
numpy
as
np
import
numpy
as
np
import
tqdm
import
torch
as
th
import
torch
as
th
import
torch.multiprocessing
as
mp
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.utils.data
import
DataLoader
import
tqdm
from
torch.nn.parallel
import
DistributedDataParallel
from
data
import
MovieLens
from
data
import
MovieLens
from
model
import
GCMCLayer
,
DenseBiDecoder
,
BiDecoder
from
model
import
BiDecoder
,
DenseBiDecoder
,
GCMCLayer
from
utils
import
get_activation
,
get_optimizer
,
torch_total_param_num
,
torch_net_info
,
MetricLogger
,
to_etype_name
from
torch.nn.parallel
import
DistributedDataParallel
import
dgl
from
torch.utils.data
import
DataLoader
import
torch.multiprocessing
as
mp
from
utils
import
(
get_activation
,
get_optimizer
,
MetricLogger
,
to_etype_name
,
torch_net_info
,
torch_total_param_num
,
)
class
Net
(
nn
.
Module
):
class
Net
(
nn
.
Module
):
def
__init__
(
self
,
args
,
dev_id
):
def
__init__
(
self
,
args
,
dev_id
):
super
(
Net
,
self
).
__init__
()
super
(
Net
,
self
).
__init__
()
self
.
_act
=
get_activation
(
args
.
model_activation
)
self
.
_act
=
get_activation
(
args
.
model_activation
)
self
.
encoder
=
GCMCLayer
(
args
.
rating_vals
,
self
.
encoder
=
GCMCLayer
(
args
.
src_in_units
,
args
.
rating_vals
,
args
.
dst_in_units
,
args
.
src_in_units
,
args
.
gcn_agg_units
,
args
.
dst_in_units
,
args
.
gcn_out_units
,
args
.
gcn_agg_units
,
args
.
gcn_dropout
,
args
.
gcn_out_units
,
args
.
gcn_agg_accum
,
args
.
gcn_dropout
,
agg_act
=
self
.
_act
,
args
.
gcn_agg_accum
,
share_user_item_param
=
args
.
share_param
,
agg_act
=
self
.
_act
,
device
=
dev_id
)
share_user_item_param
=
args
.
share_param
,
device
=
dev_id
,
)
if
args
.
mix_cpu_gpu
and
args
.
use_one_hot_fea
:
if
args
.
mix_cpu_gpu
and
args
.
use_one_hot_fea
:
# if use_one_hot_fea, user and movie feature is None
# if use_one_hot_fea, user and movie feature is None
# W can be extremely large, with mix_cpu_gpu W should be stored in CPU
# W can be extremely large, with mix_cpu_gpu W should be stored in CPU
...
@@ -43,45 +54,63 @@ class Net(nn.Module):
...
@@ -43,45 +54,63 @@ class Net(nn.Module):
else
:
else
:
self
.
encoder
.
to
(
dev_id
)
self
.
encoder
.
to
(
dev_id
)
self
.
decoder
=
BiDecoder
(
in_units
=
args
.
gcn_out_units
,
self
.
decoder
=
BiDecoder
(
num_classes
=
len
(
args
.
rating_vals
),
in_units
=
args
.
gcn_out_units
,
num_basis
=
args
.
gen_r_num_basis_func
)
num_classes
=
len
(
args
.
rating_vals
),
num_basis
=
args
.
gen_r_num_basis_func
,
)
self
.
decoder
.
to
(
dev_id
)
self
.
decoder
.
to
(
dev_id
)
def
forward
(
self
,
compact_g
,
frontier
,
ufeat
,
ifeat
,
possible_rating_values
):
def
forward
(
self
,
compact_g
,
frontier
,
ufeat
,
ifeat
,
possible_rating_values
):
user_out
,
movie_out
=
self
.
encoder
(
frontier
,
ufeat
,
ifeat
)
user_out
,
movie_out
=
self
.
encoder
(
frontier
,
ufeat
,
ifeat
)
pred_ratings
=
self
.
decoder
(
compact_g
,
user_out
,
movie_out
)
pred_ratings
=
self
.
decoder
(
compact_g
,
user_out
,
movie_out
)
return
pred_ratings
return
pred_ratings
def
load_subtensor
(
input_nodes
,
pair_graph
,
blocks
,
dataset
,
parent_graph
):
def
load_subtensor
(
input_nodes
,
pair_graph
,
blocks
,
dataset
,
parent_graph
):
output_nodes
=
pair_graph
.
ndata
[
dgl
.
NID
]
output_nodes
=
pair_graph
.
ndata
[
dgl
.
NID
]
head_feat
=
input_nodes
[
'user'
]
if
dataset
.
user_feature
is
None
else
\
head_feat
=
(
dataset
.
user_feature
[
input_nodes
[
'user'
]]
input_nodes
[
"user"
]
tail_feat
=
input_nodes
[
'movie'
]
if
dataset
.
movie_feature
is
None
else
\
if
dataset
.
user_feature
is
None
dataset
.
movie_feature
[
input_nodes
[
'movie'
]]
else
dataset
.
user_feature
[
input_nodes
[
"user"
]]
)
tail_feat
=
(
input_nodes
[
"movie"
]
if
dataset
.
movie_feature
is
None
else
dataset
.
movie_feature
[
input_nodes
[
"movie"
]]
)
for
block
in
blocks
:
for
block
in
blocks
:
block
.
dstnodes
[
'user'
].
data
[
'ci'
]
=
\
block
.
dstnodes
[
"user"
].
data
[
"ci"
]
=
parent_graph
.
nodes
[
"user"
].
data
[
parent_graph
.
nodes
[
'user'
].
data
[
'ci'
][
block
.
dstnodes
[
'user'
].
data
[
dgl
.
NID
]]
"ci"
block
.
srcnodes
[
'user'
].
data
[
'cj'
]
=
\
][
block
.
dstnodes
[
"user"
].
data
[
dgl
.
NID
]]
parent_graph
.
nodes
[
'user'
].
data
[
'cj'
][
block
.
srcnodes
[
'user'
].
data
[
dgl
.
NID
]]
block
.
srcnodes
[
"user"
].
data
[
"cj"
]
=
parent_graph
.
nodes
[
"user"
].
data
[
block
.
dstnodes
[
'movie'
].
data
[
'ci'
]
=
\
"cj"
parent_graph
.
nodes
[
'movie'
].
data
[
'ci'
][
block
.
dstnodes
[
'movie'
].
data
[
dgl
.
NID
]]
][
block
.
srcnodes
[
"user"
].
data
[
dgl
.
NID
]]
block
.
srcnodes
[
'movie'
].
data
[
'cj'
]
=
\
block
.
dstnodes
[
"movie"
].
data
[
"ci"
]
=
parent_graph
.
nodes
[
"movie"
].
data
[
parent_graph
.
nodes
[
'movie'
].
data
[
'cj'
][
block
.
srcnodes
[
'movie'
].
data
[
dgl
.
NID
]]
"ci"
][
block
.
dstnodes
[
"movie"
].
data
[
dgl
.
NID
]]
block
.
srcnodes
[
"movie"
].
data
[
"cj"
]
=
parent_graph
.
nodes
[
"movie"
].
data
[
"cj"
][
block
.
srcnodes
[
"movie"
].
data
[
dgl
.
NID
]]
return
head_feat
,
tail_feat
,
blocks
return
head_feat
,
tail_feat
,
blocks
def
flatten_etypes
(
pair_graph
,
dataset
,
segment
):
def
flatten_etypes
(
pair_graph
,
dataset
,
segment
):
n_users
=
pair_graph
.
number_of_nodes
(
'
user
'
)
n_users
=
pair_graph
.
number_of_nodes
(
"
user
"
)
n_movies
=
pair_graph
.
number_of_nodes
(
'
movie
'
)
n_movies
=
pair_graph
.
number_of_nodes
(
"
movie
"
)
src
=
[]
src
=
[]
dst
=
[]
dst
=
[]
labels
=
[]
labels
=
[]
ratings
=
[]
ratings
=
[]
for
rating
in
dataset
.
possible_rating_values
:
for
rating
in
dataset
.
possible_rating_values
:
src_etype
,
dst_etype
=
pair_graph
.
edges
(
order
=
'eid'
,
etype
=
to_etype_name
(
rating
))
src_etype
,
dst_etype
=
pair_graph
.
edges
(
order
=
"eid"
,
etype
=
to_etype_name
(
rating
)
)
src
.
append
(
src_etype
)
src
.
append
(
src_etype
)
dst
.
append
(
dst_etype
)
dst
.
append
(
dst_etype
)
label
=
np
.
searchsorted
(
dataset
.
possible_rating_values
,
rating
)
label
=
np
.
searchsorted
(
dataset
.
possible_rating_values
,
rating
)
...
@@ -92,85 +121,117 @@ def flatten_etypes(pair_graph, dataset, segment):
...
@@ -92,85 +121,117 @@ def flatten_etypes(pair_graph, dataset, segment):
ratings
=
th
.
cat
(
ratings
)
ratings
=
th
.
cat
(
ratings
)
labels
=
th
.
cat
(
labels
)
labels
=
th
.
cat
(
labels
)
flattened_pair_graph
=
dgl
.
heterograph
({
flattened_pair_graph
=
dgl
.
heterograph
(
(
'user'
,
'rate'
,
'movie'
):
(
src
,
dst
)},
{(
"user"
,
"rate"
,
"movie"
):
(
src
,
dst
)},
num_nodes_dict
=
{
'user'
:
n_users
,
'movie'
:
n_movies
})
num_nodes_dict
=
{
"user"
:
n_users
,
"movie"
:
n_movies
},
flattened_pair_graph
.
edata
[
'rating'
]
=
ratings
)
flattened_pair_graph
.
edata
[
'label'
]
=
labels
flattened_pair_graph
.
edata
[
"rating"
]
=
ratings
flattened_pair_graph
.
edata
[
"label"
]
=
labels
return
flattened_pair_graph
return
flattened_pair_graph
def
evaluate
(
args
,
dev_id
,
net
,
dataset
,
dataloader
,
segment
=
'valid'
):
def
evaluate
(
args
,
dev_id
,
net
,
dataset
,
dataloader
,
segment
=
"valid"
):
possible_rating_values
=
dataset
.
possible_rating_values
possible_rating_values
=
dataset
.
possible_rating_values
nd_possible_rating_values
=
th
.
FloatTensor
(
possible_rating_values
).
to
(
dev_id
)
nd_possible_rating_values
=
th
.
FloatTensor
(
possible_rating_values
).
to
(
dev_id
)
real_pred_ratings
=
[]
real_pred_ratings
=
[]
true_rel_ratings
=
[]
true_rel_ratings
=
[]
for
input_nodes
,
pair_graph
,
blocks
in
dataloader
:
for
input_nodes
,
pair_graph
,
blocks
in
dataloader
:
head_feat
,
tail_feat
,
blocks
=
load_subtensor
(
head_feat
,
tail_feat
,
blocks
=
load_subtensor
(
input_nodes
,
pair_graph
,
blocks
,
dataset
,
input_nodes
,
dataset
.
valid_enc_graph
if
segment
==
'valid'
else
dataset
.
test_enc_graph
)
pair_graph
,
blocks
,
dataset
,
dataset
.
valid_enc_graph
if
segment
==
"valid"
else
dataset
.
test_enc_graph
,
)
frontier
=
blocks
[
0
]
frontier
=
blocks
[
0
]
true_relation_ratings
=
\
true_relation_ratings
=
(
dataset
.
valid_truths
[
pair_graph
.
edata
[
dgl
.
EID
]]
if
segment
==
'valid'
else
\
dataset
.
valid_truths
[
pair_graph
.
edata
[
dgl
.
EID
]]
dataset
.
test_truths
[
pair_graph
.
edata
[
dgl
.
EID
]]
if
segment
==
"valid"
else
dataset
.
test_truths
[
pair_graph
.
edata
[
dgl
.
EID
]]
)
frontier
=
frontier
.
to
(
dev_id
)
frontier
=
frontier
.
to
(
dev_id
)
head_feat
=
head_feat
.
to
(
dev_id
)
head_feat
=
head_feat
.
to
(
dev_id
)
tail_feat
=
tail_feat
.
to
(
dev_id
)
tail_feat
=
tail_feat
.
to
(
dev_id
)
pair_graph
=
pair_graph
.
to
(
dev_id
)
pair_graph
=
pair_graph
.
to
(
dev_id
)
with
th
.
no_grad
():
with
th
.
no_grad
():
pred_ratings
=
net
(
pair_graph
,
frontier
,
pred_ratings
=
net
(
head_feat
,
tail_feat
,
possible_rating_values
)
pair_graph
,
batch_pred_ratings
=
(
th
.
softmax
(
pred_ratings
,
dim
=
1
)
*
frontier
,
nd_possible_rating_values
.
view
(
1
,
-
1
)).
sum
(
dim
=
1
)
head_feat
,
tail_feat
,
possible_rating_values
,
)
batch_pred_ratings
=
(
th
.
softmax
(
pred_ratings
,
dim
=
1
)
*
nd_possible_rating_values
.
view
(
1
,
-
1
)
).
sum
(
dim
=
1
)
real_pred_ratings
.
append
(
batch_pred_ratings
)
real_pred_ratings
.
append
(
batch_pred_ratings
)
true_rel_ratings
.
append
(
true_relation_ratings
)
true_rel_ratings
.
append
(
true_relation_ratings
)
real_pred_ratings
=
th
.
cat
(
real_pred_ratings
,
dim
=
0
)
real_pred_ratings
=
th
.
cat
(
real_pred_ratings
,
dim
=
0
)
true_rel_ratings
=
th
.
cat
(
true_rel_ratings
,
dim
=
0
).
to
(
dev_id
)
true_rel_ratings
=
th
.
cat
(
true_rel_ratings
,
dim
=
0
).
to
(
dev_id
)
rmse
=
((
real_pred_ratings
-
true_rel_ratings
)
**
2.
).
mean
().
item
()
rmse
=
((
real_pred_ratings
-
true_rel_ratings
)
**
2.
0
).
mean
().
item
()
rmse
=
np
.
sqrt
(
rmse
)
rmse
=
np
.
sqrt
(
rmse
)
return
rmse
return
rmse
def
config
():
def
config
():
parser
=
argparse
.
ArgumentParser
(
description
=
'GCMC'
)
parser
=
argparse
.
ArgumentParser
(
description
=
"GCMC"
)
parser
.
add_argument
(
'--seed'
,
default
=
123
,
type
=
int
)
parser
.
add_argument
(
"--seed"
,
default
=
123
,
type
=
int
)
parser
.
add_argument
(
'--gpu'
,
type
=
str
,
default
=
'0'
)
parser
.
add_argument
(
"--gpu"
,
type
=
str
,
default
=
"0"
)
parser
.
add_argument
(
'--save_dir'
,
type
=
str
,
help
=
'The saving directory'
)
parser
.
add_argument
(
"--save_dir"
,
type
=
str
,
help
=
"The saving directory"
)
parser
.
add_argument
(
'--save_id'
,
type
=
int
,
help
=
'The saving log id'
)
parser
.
add_argument
(
"--save_id"
,
type
=
int
,
help
=
"The saving log id"
)
parser
.
add_argument
(
'--silent'
,
action
=
'store_true'
)
parser
.
add_argument
(
"--silent"
,
action
=
"store_true"
)
parser
.
add_argument
(
'--data_name'
,
default
=
'ml-1m'
,
type
=
str
,
parser
.
add_argument
(
help
=
'The dataset name: ml-100k, ml-1m, ml-10m'
)
"--data_name"
,
parser
.
add_argument
(
'--data_test_ratio'
,
type
=
float
,
default
=
0.1
)
## for ml-100k the test ration is 0.2
default
=
"ml-1m"
,
parser
.
add_argument
(
'--data_valid_ratio'
,
type
=
float
,
default
=
0.1
)
type
=
str
,
parser
.
add_argument
(
'--use_one_hot_fea'
,
action
=
'store_true'
,
default
=
False
)
help
=
"The dataset name: ml-100k, ml-1m, ml-10m"
,
parser
.
add_argument
(
'--model_activation'
,
type
=
str
,
default
=
"leaky"
)
)
parser
.
add_argument
(
'--gcn_dropout'
,
type
=
float
,
default
=
0.7
)
parser
.
add_argument
(
parser
.
add_argument
(
'--gcn_agg_norm_symm'
,
type
=
bool
,
default
=
True
)
"--data_test_ratio"
,
type
=
float
,
default
=
0.1
parser
.
add_argument
(
'--gcn_agg_units'
,
type
=
int
,
default
=
500
)
)
## for ml-100k the test ration is 0.2
parser
.
add_argument
(
'--gcn_agg_accum'
,
type
=
str
,
default
=
"sum"
)
parser
.
add_argument
(
"--data_valid_ratio"
,
type
=
float
,
default
=
0.1
)
parser
.
add_argument
(
'--gcn_out_units'
,
type
=
int
,
default
=
75
)
parser
.
add_argument
(
"--use_one_hot_fea"
,
action
=
"store_true"
,
default
=
False
)
parser
.
add_argument
(
'--gen_r_num_basis_func'
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
"--model_activation"
,
type
=
str
,
default
=
"leaky"
)
parser
.
add_argument
(
'--train_max_epoch'
,
type
=
int
,
default
=
1000
)
parser
.
add_argument
(
"--gcn_dropout"
,
type
=
float
,
default
=
0.7
)
parser
.
add_argument
(
'--train_log_interval'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--gcn_agg_norm_symm"
,
type
=
bool
,
default
=
True
)
parser
.
add_argument
(
'--train_valid_interval'
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--gcn_agg_units"
,
type
=
int
,
default
=
500
)
parser
.
add_argument
(
'--train_optimizer'
,
type
=
str
,
default
=
"adam"
)
parser
.
add_argument
(
"--gcn_agg_accum"
,
type
=
str
,
default
=
"sum"
)
parser
.
add_argument
(
'--train_grad_clip'
,
type
=
float
,
default
=
1.0
)
parser
.
add_argument
(
"--gcn_out_units"
,
type
=
int
,
default
=
75
)
parser
.
add_argument
(
'--train_lr'
,
type
=
float
,
default
=
0.01
)
parser
.
add_argument
(
"--gen_r_num_basis_func"
,
type
=
int
,
default
=
2
)
parser
.
add_argument
(
'--train_min_lr'
,
type
=
float
,
default
=
0.0001
)
parser
.
add_argument
(
"--train_max_epoch"
,
type
=
int
,
default
=
1000
)
parser
.
add_argument
(
'--train_lr_decay_factor'
,
type
=
float
,
default
=
0.5
)
parser
.
add_argument
(
"--train_log_interval"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--train_decay_patience'
,
type
=
int
,
default
=
25
)
parser
.
add_argument
(
"--train_valid_interval"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
'--train_early_stopping_patience'
,
type
=
int
,
default
=
50
)
parser
.
add_argument
(
"--train_optimizer"
,
type
=
str
,
default
=
"adam"
)
parser
.
add_argument
(
'--share_param'
,
default
=
False
,
action
=
'store_true'
)
parser
.
add_argument
(
"--train_grad_clip"
,
type
=
float
,
default
=
1.0
)
parser
.
add_argument
(
'--mix_cpu_gpu'
,
default
=
False
,
action
=
'store_true'
)
parser
.
add_argument
(
"--train_lr"
,
type
=
float
,
default
=
0.01
)
parser
.
add_argument
(
'--minibatch_size'
,
type
=
int
,
default
=
20000
)
parser
.
add_argument
(
"--train_min_lr"
,
type
=
float
,
default
=
0.0001
)
parser
.
add_argument
(
'--num_workers_per_gpu'
,
type
=
int
,
default
=
8
)
parser
.
add_argument
(
"--train_lr_decay_factor"
,
type
=
float
,
default
=
0.5
)
parser
.
add_argument
(
"--train_decay_patience"
,
type
=
int
,
default
=
25
)
parser
.
add_argument
(
"--train_early_stopping_patience"
,
type
=
int
,
default
=
50
)
parser
.
add_argument
(
"--share_param"
,
default
=
False
,
action
=
"store_true"
)
parser
.
add_argument
(
"--mix_cpu_gpu"
,
default
=
False
,
action
=
"store_true"
)
parser
.
add_argument
(
"--minibatch_size"
,
type
=
int
,
default
=
20000
)
parser
.
add_argument
(
"--num_workers_per_gpu"
,
type
=
int
,
default
=
8
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
### configure save_fir to save all the info
### configure save_fir to save all the info
if
args
.
save_dir
is
None
:
if
args
.
save_dir
is
None
:
args
.
save_dir
=
args
.
data_name
+
"_"
+
''
.
join
(
random
.
choices
(
string
.
ascii_uppercase
+
string
.
digits
,
k
=
2
))
args
.
save_dir
=
(
args
.
data_name
+
"_"
+
""
.
join
(
random
.
choices
(
string
.
ascii_uppercase
+
string
.
digits
,
k
=
2
)
)
)
if
args
.
save_id
is
None
:
if
args
.
save_id
is
None
:
args
.
save_id
=
np
.
random
.
randint
(
20
)
args
.
save_id
=
np
.
random
.
randint
(
20
)
args
.
save_dir
=
os
.
path
.
join
(
"log"
,
args
.
save_dir
)
args
.
save_dir
=
os
.
path
.
join
(
"log"
,
args
.
save_dir
)
...
@@ -179,16 +240,20 @@ def config():
...
@@ -179,16 +240,20 @@ def config():
return
args
return
args
def
run
(
proc_id
,
n_gpus
,
args
,
devices
,
dataset
):
def
run
(
proc_id
,
n_gpus
,
args
,
devices
,
dataset
):
dev_id
=
devices
[
proc_id
]
dev_id
=
devices
[
proc_id
]
if
n_gpus
>
1
:
if
n_gpus
>
1
:
dist_init_method
=
'tcp://{master_ip}:{master_port}'
.
format
(
dist_init_method
=
"tcp://{master_ip}:{master_port}"
.
format
(
master_ip
=
'127.0.0.1'
,
master_port
=
'12345'
)
master_ip
=
"127.0.0.1"
,
master_port
=
"12345"
)
world_size
=
n_gpus
world_size
=
n_gpus
th
.
distributed
.
init_process_group
(
backend
=
"nccl"
,
th
.
distributed
.
init_process_group
(
init_method
=
dist_init_method
,
backend
=
"nccl"
,
world_size
=
world_size
,
init_method
=
dist_init_method
,
rank
=
dev_id
)
world_size
=
world_size
,
rank
=
dev_id
,
)
if
n_gpus
>
0
:
if
n_gpus
>
0
:
th
.
cuda
.
set_device
(
dev_id
)
th
.
cuda
.
set_device
(
dev_id
)
...
@@ -196,21 +261,29 @@ def run(proc_id, n_gpus, args, devices, dataset):
...
@@ -196,21 +261,29 @@ def run(proc_id, n_gpus, args, devices, dataset):
train_truths
=
dataset
.
train_truths
train_truths
=
dataset
.
train_truths
num_edges
=
train_truths
.
shape
[
0
]
num_edges
=
train_truths
.
shape
[
0
]
reverse_types
=
{
to_etype_name
(
k
):
'rev-'
+
to_etype_name
(
k
)
reverse_types
=
{
for
k
in
dataset
.
possible_rating_values
}
to_etype_name
(
k
):
"rev-"
+
to_etype_name
(
k
)
for
k
in
dataset
.
possible_rating_values
}
reverse_types
.
update
({
v
:
k
for
k
,
v
in
reverse_types
.
items
()})
reverse_types
.
update
({
v
:
k
for
k
,
v
in
reverse_types
.
items
()})
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
([
None
],
return_eids
=
True
)
sampler
=
dgl
.
dataloading
.
MultiLayerNeighborSampler
(
[
None
],
return_eids
=
True
)
sampler
=
dgl
.
dataloading
.
as_edge_prediction_sampler
(
sampler
)
sampler
=
dgl
.
dataloading
.
as_edge_prediction_sampler
(
sampler
)
dataloader
=
dgl
.
dataloading
.
DataLoader
(
dataloader
=
dgl
.
dataloading
.
DataLoader
(
dataset
.
train_enc_graph
,
dataset
.
train_enc_graph
,
{
to_etype_name
(
k
):
th
.
arange
(
{
dataset
.
train_enc_graph
.
number_of_edges
(
etype
=
to_etype_name
(
k
)))
to_etype_name
(
k
):
th
.
arange
(
for
k
in
dataset
.
possible_rating_values
},
dataset
.
train_enc_graph
.
number_of_edges
(
etype
=
to_etype_name
(
k
))
)
for
k
in
dataset
.
possible_rating_values
},
sampler
,
sampler
,
use_ddp
=
n_gpus
>
1
,
use_ddp
=
n_gpus
>
1
,
batch_size
=
args
.
minibatch_size
,
batch_size
=
args
.
minibatch_size
,
shuffle
=
True
,
shuffle
=
True
,
drop_last
=
False
)
drop_last
=
False
,
)
if
proc_id
==
0
:
if
proc_id
==
0
:
valid_dataloader
=
dgl
.
dataloading
.
DataLoader
(
valid_dataloader
=
dgl
.
dataloading
.
DataLoader
(
...
@@ -220,7 +293,8 @@ def run(proc_id, n_gpus, args, devices, dataset):
...
@@ -220,7 +293,8 @@ def run(proc_id, n_gpus, args, devices, dataset):
g_sampling
=
dataset
.
valid_enc_graph
,
g_sampling
=
dataset
.
valid_enc_graph
,
batch_size
=
args
.
minibatch_size
,
batch_size
=
args
.
minibatch_size
,
shuffle
=
False
,
shuffle
=
False
,
drop_last
=
False
)
drop_last
=
False
,
)
test_dataloader
=
dgl
.
dataloading
.
DataLoader
(
test_dataloader
=
dgl
.
dataloading
.
DataLoader
(
dataset
.
test_dec_graph
,
dataset
.
test_dec_graph
,
th
.
arange
(
dataset
.
test_dec_graph
.
number_of_edges
()),
th
.
arange
(
dataset
.
test_dec_graph
.
number_of_edges
()),
...
@@ -228,19 +302,23 @@ def run(proc_id, n_gpus, args, devices, dataset):
...
@@ -228,19 +302,23 @@ def run(proc_id, n_gpus, args, devices, dataset):
g_sampling
=
dataset
.
test_enc_graph
,
g_sampling
=
dataset
.
test_enc_graph
,
batch_size
=
args
.
minibatch_size
,
batch_size
=
args
.
minibatch_size
,
shuffle
=
False
,
shuffle
=
False
,
drop_last
=
False
)
drop_last
=
False
,
)
nd_possible_rating_values
=
\
nd_possible_rating_values
=
th
.
FloatTensor
(
dataset
.
possible_rating_values
)
th
.
FloatTensor
(
dataset
.
possible_rating_values
)
nd_possible_rating_values
=
nd_possible_rating_values
.
to
(
dev_id
)
nd_possible_rating_values
=
nd_possible_rating_values
.
to
(
dev_id
)
net
=
Net
(
args
=
args
,
dev_id
=
dev_id
)
net
=
Net
(
args
=
args
,
dev_id
=
dev_id
)
net
=
net
.
to
(
dev_id
)
net
=
net
.
to
(
dev_id
)
if
n_gpus
>
1
:
if
n_gpus
>
1
:
net
=
DistributedDataParallel
(
net
,
device_ids
=
[
dev_id
],
output_device
=
dev_id
)
net
=
DistributedDataParallel
(
net
,
device_ids
=
[
dev_id
],
output_device
=
dev_id
)
rating_loss_net
=
nn
.
CrossEntropyLoss
()
rating_loss_net
=
nn
.
CrossEntropyLoss
()
learning_rate
=
args
.
train_lr
learning_rate
=
args
.
train_lr
optimizer
=
get_optimizer
(
args
.
train_optimizer
)(
net
.
parameters
(),
lr
=
learning_rate
)
optimizer
=
get_optimizer
(
args
.
train_optimizer
)(
net
.
parameters
(),
lr
=
learning_rate
)
print
(
"Loading network finished ...
\n
"
)
print
(
"Loading network finished ...
\n
"
)
### declare the loss information
### declare the loss information
...
@@ -263,18 +341,33 @@ def run(proc_id, n_gpus, args, devices, dataset):
...
@@ -263,18 +341,33 @@ def run(proc_id, n_gpus, args, devices, dataset):
with
tqdm
.
tqdm
(
dataloader
)
as
tq
:
with
tqdm
.
tqdm
(
dataloader
)
as
tq
:
for
step
,
(
input_nodes
,
pair_graph
,
blocks
)
in
enumerate
(
tq
):
for
step
,
(
input_nodes
,
pair_graph
,
blocks
)
in
enumerate
(
tq
):
head_feat
,
tail_feat
,
blocks
=
load_subtensor
(
head_feat
,
tail_feat
,
blocks
=
load_subtensor
(
input_nodes
,
pair_graph
,
blocks
,
dataset
,
dataset
.
train_enc_graph
)
input_nodes
,
pair_graph
,
blocks
,
dataset
,
dataset
.
train_enc_graph
,
)
frontier
=
blocks
[
0
]
frontier
=
blocks
[
0
]
compact_g
=
flatten_etypes
(
pair_graph
,
dataset
,
'train'
).
to
(
dev_id
)
compact_g
=
flatten_etypes
(
pair_graph
,
dataset
,
"train"
).
to
(
true_relation_labels
=
compact_g
.
edata
[
'label'
]
dev_id
true_relation_ratings
=
compact_g
.
edata
[
'rating'
]
)
true_relation_labels
=
compact_g
.
edata
[
"label"
]
true_relation_ratings
=
compact_g
.
edata
[
"rating"
]
head_feat
=
head_feat
.
to
(
dev_id
)
head_feat
=
head_feat
.
to
(
dev_id
)
tail_feat
=
tail_feat
.
to
(
dev_id
)
tail_feat
=
tail_feat
.
to
(
dev_id
)
frontier
=
frontier
.
to
(
dev_id
)
frontier
=
frontier
.
to
(
dev_id
)
pred_ratings
=
net
(
compact_g
,
frontier
,
head_feat
,
tail_feat
,
dataset
.
possible_rating_values
)
pred_ratings
=
net
(
loss
=
rating_loss_net
(
pred_ratings
,
true_relation_labels
.
to
(
dev_id
)).
mean
()
compact_g
,
frontier
,
head_feat
,
tail_feat
,
dataset
.
possible_rating_values
,
)
loss
=
rating_loss_net
(
pred_ratings
,
true_relation_labels
.
to
(
dev_id
)
).
mean
()
count_loss
+=
loss
.
item
()
count_loss
+=
loss
.
item
()
optimizer
.
zero_grad
()
optimizer
.
zero_grad
()
loss
.
backward
()
loss
.
backward
()
...
@@ -282,17 +375,27 @@ def run(proc_id, n_gpus, args, devices, dataset):
...
@@ -282,17 +375,27 @@ def run(proc_id, n_gpus, args, devices, dataset):
optimizer
.
step
()
optimizer
.
step
()
if
proc_id
==
0
and
iter_idx
==
1
:
if
proc_id
==
0
and
iter_idx
==
1
:
print
(
"Total #Param of net: %d"
%
(
torch_total_param_num
(
net
)))
print
(
"Total #Param of net: %d"
%
(
torch_total_param_num
(
net
))
real_pred_ratings
=
(
th
.
softmax
(
pred_ratings
,
dim
=
1
)
*
)
nd_possible_rating_values
.
view
(
1
,
-
1
)).
sum
(
dim
=
1
)
rmse
=
((
real_pred_ratings
-
true_relation_ratings
.
to
(
dev_id
))
**
2
).
sum
()
real_pred_ratings
=
(
th
.
softmax
(
pred_ratings
,
dim
=
1
)
*
nd_possible_rating_values
.
view
(
1
,
-
1
)
).
sum
(
dim
=
1
)
rmse
=
(
(
real_pred_ratings
-
true_relation_ratings
.
to
(
dev_id
))
**
2
).
sum
()
count_rmse
+=
rmse
.
item
()
count_rmse
+=
rmse
.
item
()
count_num
+=
pred_ratings
.
shape
[
0
]
count_num
+=
pred_ratings
.
shape
[
0
]
tq
.
set_postfix
({
'loss'
:
'{:.4f}'
.
format
(
count_loss
/
iter_idx
),
tq
.
set_postfix
(
'rmse'
:
'{:.4f}'
.
format
(
count_rmse
/
count_num
)},
{
refresh
=
False
)
"loss"
:
"{:.4f}"
.
format
(
count_loss
/
iter_idx
),
"rmse"
:
"{:.4f}"
.
format
(
count_rmse
/
count_num
),
},
refresh
=
False
,
)
iter_idx
+=
1
iter_idx
+=
1
...
@@ -304,39 +407,50 @@ def run(proc_id, n_gpus, args, devices, dataset):
...
@@ -304,39 +407,50 @@ def run(proc_id, n_gpus, args, devices, dataset):
if
n_gpus
>
1
:
if
n_gpus
>
1
:
th
.
distributed
.
barrier
()
th
.
distributed
.
barrier
()
if
proc_id
==
0
:
if
proc_id
==
0
:
valid_rmse
=
evaluate
(
args
=
args
,
valid_rmse
=
evaluate
(
dev_id
=
dev_id
,
args
=
args
,
net
=
net
,
dev_id
=
dev_id
,
dataset
=
dataset
,
net
=
net
,
dataloader
=
valid_dataloader
,
dataset
=
dataset
,
segment
=
'valid'
)
dataloader
=
valid_dataloader
,
logging_str
=
'Val RMSE={:.4f}'
.
format
(
valid_rmse
)
segment
=
"valid"
,
)
logging_str
=
"Val RMSE={:.4f}"
.
format
(
valid_rmse
)
if
valid_rmse
<
best_valid_rmse
:
if
valid_rmse
<
best_valid_rmse
:
best_valid_rmse
=
valid_rmse
best_valid_rmse
=
valid_rmse
no_better_valid
=
0
no_better_valid
=
0
best_epoch
=
epoch
best_epoch
=
epoch
test_rmse
=
evaluate
(
args
=
args
,
test_rmse
=
evaluate
(
dev_id
=
dev_id
,
args
=
args
,
net
=
net
,
dev_id
=
dev_id
,
dataset
=
dataset
,
net
=
net
,
dataloader
=
test_dataloader
,
dataset
=
dataset
,
segment
=
'test'
)
dataloader
=
test_dataloader
,
segment
=
"test"
,
)
best_test_rmse
=
test_rmse
best_test_rmse
=
test_rmse
logging_str
+=
'
, Test RMSE={:.4f}
'
.
format
(
test_rmse
)
logging_str
+=
"
, Test RMSE={:.4f}
"
.
format
(
test_rmse
)
else
:
else
:
no_better_valid
+=
1
no_better_valid
+=
1
if
no_better_valid
>
args
.
train_early_stopping_patience
\
if
(
and
learning_rate
<=
args
.
train_min_lr
:
no_better_valid
>
args
.
train_early_stopping_patience
logging
.
info
(
"Early stopping threshold reached. Stop training."
)
and
learning_rate
<=
args
.
train_min_lr
):
logging
.
info
(
"Early stopping threshold reached. Stop training."
)
break
break
if
no_better_valid
>
args
.
train_decay_patience
:
if
no_better_valid
>
args
.
train_decay_patience
:
new_lr
=
max
(
learning_rate
*
args
.
train_lr_decay_factor
,
args
.
train_min_lr
)
new_lr
=
max
(
learning_rate
*
args
.
train_lr_decay_factor
,
args
.
train_min_lr
,
)
if
new_lr
<
learning_rate
:
if
new_lr
<
learning_rate
:
logging
.
info
(
"
\t
Change the LR to %g"
%
new_lr
)
logging
.
info
(
"
\t
Change the LR to %g"
%
new_lr
)
learning_rate
=
new_lr
learning_rate
=
new_lr
for
p
in
optimizer
.
param_groups
:
for
p
in
optimizer
.
param_groups
:
p
[
'
lr
'
]
=
learning_rate
p
[
"
lr
"
]
=
learning_rate
no_better_valid
=
0
no_better_valid
=
0
print
(
"Change the LR to %g"
%
new_lr
)
print
(
"Change the LR to %g"
%
new_lr
)
# sync on evalution
# sync on evalution
...
@@ -346,24 +460,30 @@ def run(proc_id, n_gpus, args, devices, dataset):
...
@@ -346,24 +460,30 @@ def run(proc_id, n_gpus, args, devices, dataset):
if
proc_id
==
0
:
if
proc_id
==
0
:
print
(
logging_str
)
print
(
logging_str
)
if
proc_id
==
0
:
if
proc_id
==
0
:
print
(
'Best epoch Idx={}, Best Valid RMSE={:.4f}, Best Test RMSE={:.4f}'
.
format
(
print
(
best_epoch
,
best_valid_rmse
,
best_test_rmse
))
"Best epoch Idx={}, Best Valid RMSE={:.4f}, Best Test RMSE={:.4f}"
.
format
(
best_epoch
,
best_valid_rmse
,
best_test_rmse
)
)
if
__name__
==
'
__main__
'
:
if
__name__
==
"
__main__
"
:
args
=
config
()
args
=
config
()
devices
=
list
(
map
(
int
,
args
.
gpu
.
split
(
','
)))
devices
=
list
(
map
(
int
,
args
.
gpu
.
split
(
","
)))
n_gpus
=
len
(
devices
)
n_gpus
=
len
(
devices
)
# For GCMC based on sampling, we require node has its own features.
# For GCMC based on sampling, we require node has its own features.
# Otherwise (node_id is the feature), the model can not scale
# Otherwise (node_id is the feature), the model can not scale
dataset
=
MovieLens
(
args
.
data_name
,
dataset
=
MovieLens
(
'cpu'
,
args
.
data_name
,
mix_cpu_gpu
=
args
.
mix_cpu_gpu
,
"cpu"
,
use_one_hot_fea
=
args
.
use_one_hot_fea
,
mix_cpu_gpu
=
args
.
mix_cpu_gpu
,
symm
=
args
.
gcn_agg_norm_symm
,
use_one_hot_fea
=
args
.
use_one_hot_fea
,
test_ratio
=
args
.
data_test_ratio
,
symm
=
args
.
gcn_agg_norm_symm
,
valid_ratio
=
args
.
data_valid_ratio
)
test_ratio
=
args
.
data_test_ratio
,
valid_ratio
=
args
.
data_valid_ratio
,
)
print
(
"Loading data finished ...
\n
"
)
print
(
"Loading data finished ...
\n
"
)
args
.
src_in_units
=
dataset
.
user_feature_shape
[
1
]
args
.
src_in_units
=
dataset
.
user_feature_shape
[
1
]
...
@@ -372,7 +492,7 @@ if __name__ == '__main__':
...
@@ -372,7 +492,7 @@ if __name__ == '__main__':
# cpu
# cpu
if
devices
[
0
]
==
-
1
:
if
devices
[
0
]
==
-
1
:
run
(
0
,
0
,
args
,
[
'
cpu
'
],
dataset
)
run
(
0
,
0
,
args
,
[
"
cpu
"
],
dataset
)
# gpu
# gpu
elif
n_gpus
==
1
:
elif
n_gpus
==
1
:
run
(
0
,
n_gpus
,
args
,
devices
,
dataset
)
run
(
0
,
n_gpus
,
args
,
devices
,
dataset
)
...
...
examples/pytorch/gcn/train.py
View file @
704bcaf6
import
argparse
import
argparse
import
dgl
import
dgl.nn
as
dglnn
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
dgl
import
dgl.nn
as
dglnn
from
dgl
import
AddSelfLoop
from
dgl
import
AddSelfLoop
from
dgl.data
import
CiteseerGraphDataset
,
CoraGraphDataset
,
PubmedGraphDataset
from
dgl.data
import
CiteseerGraphDataset
,
CoraGraphDataset
,
PubmedGraphDataset
...
...
examples/pytorch/geniepath/model.py
View file @
704bcaf6
import
torch
as
th
import
torch
as
th
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.nn
import
LSTM
from
dgl.nn
import
GATConv
from
dgl.nn
import
GATConv
from
torch.nn
import
LSTM
class
GeniePathConv
(
nn
.
Module
):
class
GeniePathConv
(
nn
.
Module
):
...
...
examples/pytorch/geniepath/ppi.py
View file @
704bcaf6
...
@@ -3,11 +3,11 @@ import argparse
...
@@ -3,11 +3,11 @@ import argparse
import
numpy
as
np
import
numpy
as
np
import
torch
as
th
import
torch
as
th
import
torch.optim
as
optim
import
torch.optim
as
optim
from
model
import
GeniePath
,
GeniePathLazy
from
sklearn.metrics
import
f1_score
from
dgl.data
import
PPIDataset
from
dgl.data
import
PPIDataset
from
dgl.dataloading
import
GraphDataLoader
from
dgl.dataloading
import
GraphDataLoader
from
model
import
GeniePath
,
GeniePathLazy
from
sklearn.metrics
import
f1_score
def
evaluate
(
model
,
loss_fn
,
dataloader
,
device
=
"cpu"
):
def
evaluate
(
model
,
loss_fn
,
dataloader
,
device
=
"cpu"
):
...
...
examples/pytorch/geniepath/pubmed.py
View file @
704bcaf6
...
@@ -2,10 +2,10 @@ import argparse
...
@@ -2,10 +2,10 @@ import argparse
import
torch
as
th
import
torch
as
th
import
torch.optim
as
optim
import
torch.optim
as
optim
from
model
import
GeniePath
,
GeniePathLazy
from
sklearn.metrics
import
accuracy_score
from
dgl.data
import
PubmedGraphDataset
from
dgl.data
import
PubmedGraphDataset
from
model
import
GeniePath
,
GeniePathLazy
from
sklearn.metrics
import
accuracy_score
def
main
(
args
):
def
main
(
args
):
...
...
examples/pytorch/ggnn/data_utils.py
View file @
704bcaf6
...
@@ -5,16 +5,16 @@ Data utils for processing bAbI datasets
...
@@ -5,16 +5,16 @@ Data utils for processing bAbI datasets
import
os
import
os
import
string
import
string
import
torch
from
torch.utils.data
import
DataLoader
import
dgl
import
dgl
import
torch
from
dgl.data.utils
import
(
from
dgl.data.utils
import
(
_get_dgl_url
,
_get_dgl_url
,
download
,
download
,
extract_archive
,
extract_archive
,
get_download_dir
,
get_download_dir
,
)
)
from
torch.utils.data
import
DataLoader
def
get_babi_dataloaders
(
batch_size
,
train_size
=
50
,
task_id
=
4
,
q_type
=
0
):
def
get_babi_dataloaders
(
batch_size
,
train_size
=
50
,
task_id
=
4
,
q_type
=
0
):
...
...
examples/pytorch/ggnn/ggnn_gc.py
View file @
704bcaf6
...
@@ -2,9 +2,9 @@
...
@@ -2,9 +2,9 @@
Gated Graph Neural Network module for graph classification tasks
Gated Graph Neural Network module for graph classification tasks
"""
"""
import
torch
import
torch
from
torch
import
nn
from
dgl.nn.pytorch
import
GatedGraphConv
,
GlobalAttentionPooling
from
dgl.nn.pytorch
import
GatedGraphConv
,
GlobalAttentionPooling
from
torch
import
nn
class
GraphClsGGNN
(
nn
.
Module
):
class
GraphClsGGNN
(
nn
.
Module
):
...
...
examples/pytorch/ggnn/ggnn_ns.py
View file @
704bcaf6
"""
"""
Gated Graph Neural Network module for node selection tasks
Gated Graph Neural Network module for node selection tasks
"""
"""
import
torch
from
torch
import
nn
import
dgl
import
dgl
import
torch
from
dgl.nn.pytorch
import
GatedGraphConv
from
dgl.nn.pytorch
import
GatedGraphConv
from
torch
import
nn
class
NodeSelectionGGNN
(
nn
.
Module
):
class
NodeSelectionGGNN
(
nn
.
Module
):
...
...
examples/pytorch/ggnn/ggsnn.py
View file @
704bcaf6
...
@@ -4,9 +4,9 @@ Gated Graph Sequence Neural Network for sequence outputs
...
@@ -4,9 +4,9 @@ Gated Graph Sequence Neural Network for sequence outputs
import
torch
import
torch
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
torch
import
nn
from
dgl.nn.pytorch
import
GatedGraphConv
,
GlobalAttentionPooling
from
dgl.nn.pytorch
import
GatedGraphConv
,
GlobalAttentionPooling
from
torch
import
nn
class
GGSNN
(
nn
.
Module
):
class
GGSNN
(
nn
.
Module
):
...
...
examples/pytorch/gin/train.py
View file @
704bcaf6
...
@@ -5,13 +5,13 @@ import torch
...
@@ -5,13 +5,13 @@ import torch
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
torch.optim
as
optim
import
torch.optim
as
optim
from
sklearn.model_selection
import
StratifiedKFold
from
torch.utils.data.sampler
import
SubsetRandomSampler
from
dgl.data
import
GINDataset
from
dgl.data
import
GINDataset
from
dgl.dataloading
import
GraphDataLoader
from
dgl.dataloading
import
GraphDataLoader
from
dgl.nn.pytorch.conv
import
GINConv
from
dgl.nn.pytorch.conv
import
GINConv
from
dgl.nn.pytorch.glob
import
SumPooling
from
dgl.nn.pytorch.glob
import
SumPooling
from
sklearn.model_selection
import
StratifiedKFold
from
torch.utils.data.sampler
import
SubsetRandomSampler
class
MLP
(
nn
.
Module
):
class
MLP
(
nn
.
Module
):
...
...
examples/pytorch/gnn_explainer/explain_main.py
View file @
704bcaf6
import
argparse
import
argparse
import
os
import
os
import
torch
as
th
from
gnnlens
import
Writer
from
models
import
Model
import
dgl
import
dgl
import
torch
as
th
from
dgl
import
load_graphs
from
dgl
import
load_graphs
from
dgl.data
import
(
BACommunityDataset
,
BAShapeDataset
,
TreeCycleDataset
,
from
dgl.data
import
(
TreeGridDataset
)
BACommunityDataset
,
BAShapeDataset
,
TreeCycleDataset
,
TreeGridDataset
,
)
from
dgl.nn
import
GNNExplainer
from
dgl.nn
import
GNNExplainer
from
gnnlens
import
Writer
from
models
import
Model
def
main
(
args
):
def
main
(
args
):
...
...
examples/pytorch/gnn_explainer/models.py
View file @
704bcaf6
import
dgl.function
as
fn
import
torch
as
th
import
torch
as
th
import
torch.nn
as
nn
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
import
dgl.function
as
fn
class
Layer
(
nn
.
Module
):
class
Layer
(
nn
.
Module
):
def
__init__
(
self
,
in_dim
,
out_dim
):
def
__init__
(
self
,
in_dim
,
out_dim
):
...
@@ -10,18 +11,19 @@ class Layer(nn.Module):
...
@@ -10,18 +11,19 @@ class Layer(nn.Module):
def
forward
(
self
,
graph
,
feat
,
eweight
=
None
):
def
forward
(
self
,
graph
,
feat
,
eweight
=
None
):
with
graph
.
local_scope
():
with
graph
.
local_scope
():
graph
.
ndata
[
'h'
]
=
feat
graph
.
ndata
[
"h"
]
=
feat
if
eweight
is
None
:
if
eweight
is
None
:
graph
.
update_all
(
fn
.
copy_u
(
'h'
,
'm'
),
fn
.
mean
(
'm'
,
'h'
))
graph
.
update_all
(
fn
.
copy_u
(
"h"
,
"m"
),
fn
.
mean
(
"m"
,
"h"
))
else
:
else
:
graph
.
edata
[
'
ew
'
]
=
eweight
graph
.
edata
[
"
ew
"
]
=
eweight
graph
.
update_all
(
fn
.
u_mul_e
(
'h'
,
'
ew
'
,
'm'
),
fn
.
mean
(
'm'
,
'h'
))
graph
.
update_all
(
fn
.
u_mul_e
(
"h"
,
"
ew
"
,
"m"
),
fn
.
mean
(
"m"
,
"h"
))
h
=
self
.
layer
(
th
.
cat
([
graph
.
ndata
[
'h'
],
feat
],
dim
=-
1
))
h
=
self
.
layer
(
th
.
cat
([
graph
.
ndata
[
"h"
],
feat
],
dim
=-
1
))
return
h
return
h
class
Model
(
nn
.
Module
):
class
Model
(
nn
.
Module
):
def
__init__
(
self
,
in_dim
,
out_dim
,
hid_dim
=
40
):
def
__init__
(
self
,
in_dim
,
out_dim
,
hid_dim
=
40
):
super
().
__init__
()
super
().
__init__
()
...
...
examples/pytorch/gnn_explainer/train_main.py
View file @
704bcaf6
import
os
import
argparse
import
argparse
import
os
import
torch
as
th
import
torch
as
th
import
torch.nn
as
nn
import
torch.nn
as
nn
from
dgl
import
save_graphs
from
dgl
import
save_graphs
from
dgl.data
import
(
BACommunityDataset
,
BAShapeDataset
,
TreeCycleDataset
,
TreeGridDataset
,
)
from
models
import
Model
from
models
import
Model
from
dgl.data
import
BAShapeDataset
,
BACommunityDataset
,
TreeCycleDataset
,
TreeGridDataset
def
main
(
args
):
def
main
(
args
):
if
args
.
dataset
==
'
BAShape
'
:
if
args
.
dataset
==
"
BAShape
"
:
dataset
=
BAShapeDataset
(
seed
=
0
)
dataset
=
BAShapeDataset
(
seed
=
0
)
elif
args
.
dataset
==
'
BACommunity
'
:
elif
args
.
dataset
==
"
BACommunity
"
:
dataset
=
BACommunityDataset
(
seed
=
0
)
dataset
=
BACommunityDataset
(
seed
=
0
)
elif
args
.
dataset
==
'
TreeCycle
'
:
elif
args
.
dataset
==
"
TreeCycle
"
:
dataset
=
TreeCycleDataset
(
seed
=
0
)
dataset
=
TreeCycleDataset
(
seed
=
0
)
elif
args
.
dataset
==
'
TreeGrid
'
:
elif
args
.
dataset
==
"
TreeGrid
"
:
dataset
=
TreeGridDataset
(
seed
=
0
)
dataset
=
TreeGridDataset
(
seed
=
0
)
graph
=
dataset
[
0
]
graph
=
dataset
[
0
]
labels
=
graph
.
ndata
[
'
label
'
]
labels
=
graph
.
ndata
[
"
label
"
]
n_feats
=
graph
.
ndata
[
'
feat
'
]
n_feats
=
graph
.
ndata
[
"
feat
"
]
num_classes
=
dataset
.
num_classes
num_classes
=
dataset
.
num_classes
model
=
Model
(
n_feats
.
shape
[
-
1
],
num_classes
)
model
=
Model
(
n_feats
.
shape
[
-
1
],
num_classes
)
...
@@ -40,16 +46,21 @@ def main(args):
...
@@ -40,16 +46,21 @@ def main(args):
loss
.
backward
()
loss
.
backward
()
optim
.
step
()
optim
.
step
()
print
(
f
'
In Epoch:
{
epoch
}
; Acc:
{
acc
}
; Loss:
{
loss
.
item
()
}
'
)
print
(
f
"
In Epoch:
{
epoch
}
; Acc:
{
acc
}
; Loss:
{
loss
.
item
()
}
"
)
model_stat_dict
=
model
.
state_dict
()
model_stat_dict
=
model
.
state_dict
()
model_path
=
os
.
path
.
join
(
'
./
'
,
f
'
model_
{
args
.
dataset
}
.pth
'
)
model_path
=
os
.
path
.
join
(
"
./
"
,
f
"
model_
{
args
.
dataset
}
.pth
"
)
th
.
save
(
model_stat_dict
,
model_path
)
th
.
save
(
model_stat_dict
,
model_path
)
if
__name__
==
'__main__'
:
parser
=
argparse
.
ArgumentParser
(
description
=
'Dummy model training'
)
if
__name__
==
"__main__"
:
parser
.
add_argument
(
'--dataset'
,
type
=
str
,
default
=
'BAShape'
,
parser
=
argparse
.
ArgumentParser
(
description
=
"Dummy model training"
)
choices
=
[
'BAShape'
,
'BACommunity'
,
'TreeCycle'
,
'TreeGrid'
])
parser
.
add_argument
(
"--dataset"
,
type
=
str
,
default
=
"BAShape"
,
choices
=
[
"BAShape"
,
"BACommunity"
,
"TreeCycle"
,
"TreeGrid"
],
)
args
=
parser
.
parse_args
()
args
=
parser
.
parse_args
()
print
(
args
)
print
(
args
)
...
...
examples/pytorch/grace/aug.py
View file @
704bcaf6
# Data augmentation on graphs via edge dropping and feature masking
# Data augmentation on graphs via edge dropping and feature masking
import
dgl
import
numpy
as
np
import
numpy
as
np
import
torch
as
th
import
torch
as
th
import
dgl
def
aug
(
graph
,
x
,
feat_drop_rate
,
edge_mask_rate
):
def
aug
(
graph
,
x
,
feat_drop_rate
,
edge_mask_rate
):
n_node
=
graph
.
num_nodes
()
n_node
=
graph
.
num_nodes
()
...
...
Prev
1
2
3
4
5
6
7
8
9
10
…
17
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment