test_movielens.py 1.6 KB
Newer Older
Jiahang Li's avatar
Jiahang Li committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import unittest

import backend as F

import dgl
from dgl.data.movielens import MovieLensDataset


@unittest.skipIf(
    F._default_context_str == "gpu",
    reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(
    dgl.backend.backend_name != "pytorch", reason="only supports pytorch"
)
def test_movielens():
    transform = dgl.AddSelfLoop(new_etypes=True)

    movielens = MovieLensDataset(name="ml-100k", valid_ratio=0.2, verbose=True)
    g = movielens[0]
    assert g.num_edges("user-movie") == g.num_edges("movie-user") == 100000
    assert (
        g.nodes["user"].data["feat"].shape[1]
        == g.nodes["user"].data["feat"].shape[1]
        == g.nodes["user"].data["feat"].shape[1]
        == 23
    )
    assert (
        g.nodes["movie"].data["feat"].shape[1]
        == g.nodes["movie"].data["feat"].shape[1]
        == g.nodes["movie"].data["feat"].shape[1]
        == 320
    )

    movielens = MovieLensDataset(
        name="ml-100k", valid_ratio=0.2, transform=transform, verbose=True
    )
    g1 = movielens[0]
    assert g1.num_edges() - g.num_edges() == g.num_nodes()
    assert g1.num_edges() - g.num_edges() == g.num_nodes()
    assert g1.num_edges() - g.num_edges() == g.num_nodes()

    movielens = MovieLensDataset(
        name="ml-1m", valid_ratio=0.2, test_ratio=0.1, verbose=True
    )
    g = movielens[0]
    assert g.num_edges("user-movie") == g.num_edges("movie-user") == 1000209

    movielens = MovieLensDataset(
        name="ml-10m", valid_ratio=0.2, test_ratio=0.1, verbose=True
    )
    g = movielens[0]
    assert g.num_edges("user-movie") == g.num_edges("movie-user") == 10000054