Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
ec7137dd
"src/git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "02ba50c6104d40b745163fd14e84214b3db90112"
Unverified
Commit
ec7137dd
authored
Aug 02, 2023
by
Andrei Ivanov
Committed by
GitHub
Aug 03, 2023
Browse files
Improving the PinSAGE example. (#6067)
parent
562a1c87
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
23 additions
and
27 deletions
+23
-27
examples/pytorch/pinsage/data_utils.py
examples/pytorch/pinsage/data_utils.py
+12
-1
examples/pytorch/pinsage/process_movielens1m.py
examples/pytorch/pinsage/process_movielens1m.py
+11
-26
No files found.
examples/pytorch/pinsage/data_utils.py
View file @
ec7137dd
...
...
@@ -26,9 +26,20 @@ def train_test_split_by_time(df, timestamp, user):
df
.
iloc
[
-
2
,
-
2
]
=
True
return
df
meta_df
=
{
"user_id"
:
np
.
int64
,
"movie_id"
:
np
.
int64
,
"rating"
:
np
.
int64
,
"timestamp"
:
np
.
int64
,
"user_id"
:
np
.
int64
,
"train_mask"
:
bool
,
"val_mask"
:
bool
,
"test_mask"
:
bool
,
}
df
=
(
df
.
groupby
(
user
,
group_keys
=
False
)
.
apply
(
train_test_split
)
.
apply
(
train_test_split
,
meta
=
meta_df
)
.
compute
(
scheduler
=
"processes"
)
.
sort_index
()
)
...
...
examples/pytorch/pinsage/process_movielens1m.py
View file @
ec7137dd
...
...
@@ -113,38 +113,23 @@ if __name__ == "__main__":
# Assign features.
# Note that variable-sized features such as texts or images are handled elsewhere.
g
.
nodes
[
"user"
].
data
[
"gender"
]
=
torch
.
LongTensor
(
users
[
"gender"
].
cat
.
codes
.
values
)
g
.
nodes
[
"user"
].
data
[
"age"
]
=
torch
.
LongTensor
(
users
[
"age"
].
cat
.
codes
.
values
)
g
.
nodes
[
"user"
].
data
[
"occupation"
]
=
torch
.
LongTensor
(
users
[
"occupation"
].
cat
.
codes
.
values
)
g
.
nodes
[
"user"
].
data
[
"zip"
]
=
torch
.
LongTensor
(
users
[
"zip"
].
cat
.
codes
.
values
)
for
data_type
in
[
"gender"
,
"age"
,
"occupation"
,
"zip"
]:
g
.
nodes
[
"user"
].
data
[
data_type
]
=
torch
.
LongTensor
(
np
.
array
(
users
[
data_type
].
cat
.
codes
.
values
)
)
g
.
nodes
[
"movie"
].
data
[
"year"
]
=
torch
.
LongTensor
(
movies
[
"year"
].
cat
.
codes
.
values
np
.
array
(
movies
[
"year"
].
cat
.
codes
.
values
)
)
g
.
nodes
[
"movie"
].
data
[
"genre"
]
=
torch
.
FloatTensor
(
movies
[
genre_columns
].
values
np
.
array
(
movies
[
genre_columns
].
values
)
)
g
.
edges
[
"watched"
].
data
[
"rating"
]
=
torch
.
LongTensor
(
ratings
[
"rating"
].
values
)
g
.
edges
[
"watched"
].
data
[
"timestamp"
]
=
torch
.
LongTensor
(
ratings
[
"timestamp"
].
values
)
g
.
edges
[
"watched-by"
].
data
[
"rating"
]
=
torch
.
LongTensor
(
ratings
[
"rating"
].
values
)
g
.
edges
[
"watched-by"
].
data
[
"timestamp"
]
=
torch
.
LongTensor
(
ratings
[
"timestamp"
].
values
)
for
edge_type
in
[
"watched"
,
"watched-by"
]:
for
data_type
in
[
"rating"
,
"timestamp"
]:
g
.
edges
[
edge_type
].
data
[
data_type
]
=
torch
.
LongTensor
(
np
.
array
(
ratings
[
data_type
].
values
)
)
# Train-validation-test split
# This is a little bit tricky as we want to select the last interaction for test, and the
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment