test_sort.py 4.37 KB
Newer Older
1
import itertools
2
3
4
import unittest
from collections import Counter

5
6
import backend as F
import networkx as nx
7
8
9
import numpy as np
import pytest
import scipy.sparse as ssp
nv-dlasalle's avatar
nv-dlasalle committed
10
from test_utils import parametrize_idtype
11

12
13
14
15
16
import dgl
import dgl.function as fn
from dgl import DGLError


17
18
def create_test_heterograph(num_nodes, num_adj, idtype):
    if isinstance(num_adj, int):
19
20
21
22
        num_adj = [num_adj, num_adj + 1]
    num_adj_list = list(
        np.random.choice(np.arange(num_adj[0], num_adj[1]), num_nodes)
    )
23
    src = np.concatenate([[i] * num_adj_list[i] for i in range(num_nodes)])
24
25
26
27
    dst = [
        np.random.choice(num_nodes, nadj, replace=False)
        for nadj in num_adj_list
    ]
28
29
30
    dst = np.concatenate(dst)
    return dgl.graph((src, dst), idtype=idtype)

31

32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
def check_sort(spm, tag_arr=None, tag_pos=None):
    if tag_arr is None:
        tag_arr = np.arange(spm.shape[0])
    else:
        tag_arr = F.asnumpy(tag_arr)
    if tag_pos is not None:
        tag_pos = F.asnumpy(tag_pos)
    for i in range(spm.shape[0]):
        row = spm.getrow(i)
        dst = row.nonzero()[1]
        if tag_pos is not None:
            tag_pos_row = tag_pos[i]
            tag_pos_ptr = tag_arr[dst[0]] if len(dst) > 0 else 0
        for j in range(len(dst) - 1):
            if tag_pos is not None and tag_arr[dst[j]] != tag_pos_ptr:
                # `tag_pos_ptr` is the expected tag value. Here we check whether the
                # tag value is equal to `tag_pos_ptr`
                return False
50
            if tag_arr[dst[j]] > tag_arr[dst[j + 1]]:
51
52
                # The tag should be in descending order after sorting
                return False
53
54
            if tag_pos is not None and tag_arr[dst[j]] < tag_arr[dst[j + 1]]:
                if j + 1 != int(tag_pos_row[tag_pos_ptr + 1]):
55
56
                    # The boundary of tag should be consistent with `tag_pos`
                    return False
57
                tag_pos_ptr = tag_arr[dst[j + 1]]
58
59
60
    return True


61
62
63
@unittest.skipIf(
    F._default_context_str == "gpu", reason="GPU sorting by tag not implemented"
)
nv-dlasalle's avatar
nv-dlasalle committed
64
@parametrize_idtype
65
66
67
68
def test_sort_with_tag(idtype):
    num_nodes, num_adj, num_tags = 200, [20, 50], 5
    g = create_test_heterograph(num_nodes, num_adj, idtype=idtype)
    tag = F.tensor(np.random.choice(num_tags, g.number_of_nodes()))
69
70
71
    src, dst = g.edges()
    edge_tag_dst = F.gather_row(tag, F.tensor(dst))
    edge_tag_src = F.gather_row(tag, F.tensor(src))
72

73
    for tag_type in ["node", "edge"]:
74
        new_g = dgl.sort_csr_by_tag(
75
76
77
78
79
80
81
82
            g, tag if tag_type == "node" else edge_tag_dst, tag_type=tag_type
        )
        old_csr = g.adjacency_matrix(scipy_fmt="csr")
        new_csr = new_g.adjacency_matrix(scipy_fmt="csr")
        assert check_sort(new_csr, tag, new_g.dstdata["_TAG_OFFSET"])
        assert not check_sort(
            old_csr, tag
        )  # Check the original csr is not modified.
83

84
    for tag_type in ["node", "edge"]:
85
        new_g = dgl.sort_csc_by_tag(
86
87
88
89
90
91
            g, tag if tag_type == "node" else edge_tag_src, tag_type=tag_type
        )
        old_csc = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
        new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt="csr")
        assert check_sort(new_csc, tag, new_g.srcdata["_TAG_OFFSET"])
        assert not check_sort(old_csc, tag)
92

93
94
95
96

@unittest.skipIf(
    F._default_context_str == "gpu", reason="GPU sorting by tag not implemented"
)
nv-dlasalle's avatar
nv-dlasalle committed
97
@parametrize_idtype
98
99
100
def test_sort_with_tag_bipartite(idtype):
    num_nodes, num_adj, num_tags = 200, [20, 50], 5
    g = create_test_heterograph(num_nodes, num_adj, idtype=idtype)
101
102
103
    g = dgl.heterograph({("_U", "_E", "_V"): g.edges()})
    utag = F.tensor(np.random.choice(num_tags, g.number_of_nodes("_U")))
    vtag = F.tensor(np.random.choice(num_tags, g.number_of_nodes("_V")))
104

105
    new_g = dgl.sort_csr_by_tag(g, vtag)
106
107
108
109
    old_csr = g.adjacency_matrix(scipy_fmt="csr")
    new_csr = new_g.adjacency_matrix(scipy_fmt="csr")
    assert check_sort(new_csr, vtag, new_g.nodes["_U"].data["_TAG_OFFSET"])
    assert not check_sort(old_csr, vtag)
110

111
    new_g = dgl.sort_csc_by_tag(g, utag)
112
113
114
115
116
    old_csc = g.adjacency_matrix(transpose=True, scipy_fmt="csr")
    new_csc = new_g.adjacency_matrix(transpose=True, scipy_fmt="csr")
    assert check_sort(new_csc, utag, new_g.nodes["_V"].data["_TAG_OFFSET"])
    assert not check_sort(old_csc, utag)

117
118
119
120

if __name__ == "__main__":
    test_sort_with_tag(F.int32)
    test_sort_with_tag_bipartite(F.int32)