Unverified Commit ec7137dd authored by Andrei Ivanov's avatar Andrei Ivanov Committed by GitHub
Browse files

Improving the PinSAGE example. (#6067)

parent 562a1c87
...@@ -26,9 +26,20 @@ def train_test_split_by_time(df, timestamp, user): ...@@ -26,9 +26,20 @@ def train_test_split_by_time(df, timestamp, user):
df.iloc[-2, -2] = True df.iloc[-2, -2] = True
return df return df
meta_df = {
"user_id": np.int64,
"movie_id": np.int64,
"rating": np.int64,
"timestamp": np.int64,
"user_id": np.int64,
"train_mask": bool,
"val_mask": bool,
"test_mask": bool,
}
df = ( df = (
df.groupby(user, group_keys=False) df.groupby(user, group_keys=False)
.apply(train_test_split) .apply(train_test_split, meta=meta_df)
.compute(scheduler="processes") .compute(scheduler="processes")
.sort_index() .sort_index()
) )
......
...@@ -113,37 +113,22 @@ if __name__ == "__main__": ...@@ -113,37 +113,22 @@ if __name__ == "__main__":
# Assign features. # Assign features.
# Note that variable-sized features such as texts or images are handled elsewhere. # Note that variable-sized features such as texts or images are handled elsewhere.
g.nodes["user"].data["gender"] = torch.LongTensor( for data_type in ["gender", "age", "occupation", "zip"]:
users["gender"].cat.codes.values g.nodes["user"].data[data_type] = torch.LongTensor(
) np.array(users[data_type].cat.codes.values)
g.nodes["user"].data["age"] = torch.LongTensor(
users["age"].cat.codes.values
)
g.nodes["user"].data["occupation"] = torch.LongTensor(
users["occupation"].cat.codes.values
)
g.nodes["user"].data["zip"] = torch.LongTensor(
users["zip"].cat.codes.values
) )
g.nodes["movie"].data["year"] = torch.LongTensor( g.nodes["movie"].data["year"] = torch.LongTensor(
movies["year"].cat.codes.values np.array(movies["year"].cat.codes.values)
) )
g.nodes["movie"].data["genre"] = torch.FloatTensor( g.nodes["movie"].data["genre"] = torch.FloatTensor(
movies[genre_columns].values np.array(movies[genre_columns].values)
) )
g.edges["watched"].data["rating"] = torch.LongTensor( for edge_type in ["watched", "watched-by"]:
ratings["rating"].values for data_type in ["rating", "timestamp"]:
) g.edges[edge_type].data[data_type] = torch.LongTensor(
g.edges["watched"].data["timestamp"] = torch.LongTensor( np.array(ratings[data_type].values)
ratings["timestamp"].values
)
g.edges["watched-by"].data["rating"] = torch.LongTensor(
ratings["rating"].values
)
g.edges["watched-by"].data["timestamp"] = torch.LongTensor(
ratings["timestamp"].values
) )
# Train-validation-test split # Train-validation-test split
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment