Unverified Commit d381f226 authored by Jiahang Li's avatar Jiahang Li Committed by GitHub
Browse files

[Dataset] MovieLens (#5567)


Co-authored-by: default avatarMufei Li <mufeili1996@gmail.com>
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-17-164.ap-northeast-1.compute.internal>
parent b0309326
......@@ -167,4 +167,4 @@ config.cmake
**.png
# model file
*.pth
*.pth
\ No newline at end of file
......@@ -67,6 +67,7 @@ Datasets for node classification/regression tasks
MinesweeperDataset
TolokersDataset
QuestionsDataset
MovieLensDataset
Edge Prediction Datasets
---------------------------------------
......
......@@ -6,6 +6,7 @@ from __future__ import absolute_import
from . import citation_graph as citegrh
from .actor import ActorDataset
from .movielens import MovieLensDataset
from .adapter import *
from .bitcoinotc import BitcoinOTC, BitcoinOTCDataset
from .citation_graph import (
......
This diff is collapsed.
import unittest
import backend as F
import dgl
from dgl.data.movielens import MovieLensDataset
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="only supports pytorch"
)
def test_movielens():
transform = dgl.AddSelfLoop(new_etypes=True)
movielens = MovieLensDataset(name="ml-100k", valid_ratio=0.2, verbose=True)
g = movielens[0]
assert g.num_edges("user-movie") == g.num_edges("movie-user") == 100000
assert (
g.nodes["user"].data["feat"].shape[1]
== g.nodes["user"].data["feat"].shape[1]
== g.nodes["user"].data["feat"].shape[1]
== 23
)
assert (
g.nodes["movie"].data["feat"].shape[1]
== g.nodes["movie"].data["feat"].shape[1]
== g.nodes["movie"].data["feat"].shape[1]
== 320
)
movielens = MovieLensDataset(
name="ml-100k", valid_ratio=0.2, transform=transform, verbose=True
)
g1 = movielens[0]
assert g1.num_edges() - g.num_edges() == g.num_nodes()
assert g1.num_edges() - g.num_edges() == g.num_nodes()
assert g1.num_edges() - g.num_edges() == g.num_nodes()
movielens = MovieLensDataset(
name="ml-1m", valid_ratio=0.2, test_ratio=0.1, verbose=True
)
g = movielens[0]
assert g.num_edges("user-movie") == g.num_edges("movie-user") == 1000209
movielens = MovieLensDataset(
name="ml-10m", valid_ratio=0.2, test_ratio=0.1, verbose=True
)
g = movielens[0]
assert g.num_edges("user-movie") == g.num_edges("movie-user") == 10000054
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment