Unverified Commit 02e79a3d authored by peizhou001's avatar peizhou001 Committed by GitHub
Browse files

[Graophbolt] Add utils for sample (#5990)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-16-19.ap-northeast-1.compute.internal>
Co-authored-by: default avatarHongzhi (Steve), Chen <chenhongzhi.nkcs@gmail.com>
parent 6519ec27
......@@ -15,6 +15,7 @@ from .impl import *
from .dataloader import *
from .subgraph_sampler import *
from .sampled_subgraph import *
from .utils import unique_and_compact_node_pairs
def load_graphbolt():
......
"""Utility functions for GraphBolt."""
from .internal import *
from .sample_utils import *
"""Utility functions for sampling."""
from collections import defaultdict
from typing import Dict, Tuple, Union
import torch
def unique_and_compact_node_pairs(
node_pairs: Union[
Tuple[torch.Tensor, torch.Tensor],
Dict[Tuple[str, str, str], Tuple[torch.Tensor, torch.Tensor]],
]
):
"""
Compact node pairs and return unique nodes (per type).
Parameters
----------
node_pairs : Tuple[torch.Tensor, torch.Tensor] or \
Dict(Tuple[str, str, str], Tuple[torch.Tensor, torch.Tensor])
Node pairs representing source-destination edges.
- If `node_pairs` is a tuple: It means the graph is homogeneous.
Also, it should be in the format ('u', 'v') representing source
and destination pairs. And IDs inside are homogeneous ids.
- If `node_pairs` is a dictionary: The keys should be edge type and
the values should be corresponding node pairs. And IDs inside are
heterogeneous ids.
Returns
-------
Tuple[node_pairs, unique_nodes]
The compacted node pairs, where node IDs are replaced with mapped node
IDs, and the unique nodes (per type).
"Compacted node pairs" indicates that the node IDs in the input node
pairs are replaced with mapped node IDs, where each type of node is
mapped to a contiguous space of IDs ranging from 0 to N.
Examples
--------
>>> import dgl.graphbolt as gb
>>> N1 = torch.LongTensor([1, 2, 2])
>>> N2 = torch.LongTensor([5, 6, 5])
>>> node_pairs = {("n1", "e1", "n2"): (N1, N2),
... ("n2", "e2", "n1"): (N2, N1)}
>>> unique_nodes, compacted_node_pairs = gb.unique_and_compact_node_pairs(
... node_pairs
... )
>>> print(unique_nodes)
{'n1': tensor([1, 2]), 'n2': tensor([5, 6])}
>>> print(compacted_node_pairs)
{('n1', 'e1', 'n2'): (tensor([0, 1, 1]), tensor([0, 1, 0])),
('n2', 'e2', 'n1'): (tensor([0, 1, 0]), tensor([0, 1, 1]))}
"""
is_homogeneous = not isinstance(node_pairs, Dict)
if is_homogeneous:
node_pairs = {("_N", "_E", "_N"): node_pairs}
nodes_dict = defaultdict(list)
# Collect nodes for each node type.
for etype, node_pair in node_pairs.items():
u_type, _, v_type = etype
u, v = node_pair
nodes_dict[u_type].append(u)
nodes_dict[v_type].append(v)
unique_nodes_dict = {}
inverse_indices_dict = {}
for ntype, nodes in nodes_dict.items():
collected_nodes = torch.cat(nodes)
# Compact and find unique nodes.
unique_nodes, inverse_indices = torch.unique(
collected_nodes,
return_inverse=True,
)
unique_nodes_dict[ntype] = unique_nodes
inverse_indices_dict[ntype] = inverse_indices
# Map back in same order as collect.
compacted_node_pairs = {}
unique_nodes = unique_nodes_dict
for etype, node_pair in node_pairs.items():
u_type, _, v_type = etype
u, v = node_pair
u_size, v_size = u.numel(), v.numel()
u = inverse_indices_dict[u_type][:u_size]
inverse_indices_dict[u_type] = inverse_indices_dict[u_type][u_size:]
v = inverse_indices_dict[v_type][:v_size]
inverse_indices_dict[v_type] = inverse_indices_dict[v_type][v_size:]
compacted_node_pairs[etype] = (u, v)
# Return singleton for homogeneous graph.
if is_homogeneous:
compacted_node_pairs = list(compacted_node_pairs.values())[0]
unique_nodes = list(unique_nodes_dict.values())[0]
return unique_nodes, compacted_node_pairs
import dgl.graphbolt as gb
import torch
def test_unique_and_compact_node_pairs_hetero():
N1 = torch.randint(0, 50, (30,))
N2 = torch.randint(0, 50, (20,))
N3 = torch.randint(0, 50, (10,))
unique_N1, compacted_N1 = torch.unique(N1, return_inverse=True)
unique_N2, compacted_N2 = torch.unique(N2, return_inverse=True)
unique_N3, compacted_N3 = torch.unique(N3, return_inverse=True)
expected_unique_nodes = {
"n1": unique_N1,
"n2": unique_N2,
"n3": unique_N3,
}
expected_compacted_pairs = {
("n1", "e1", "n2"): (
compacted_N1[:20],
compacted_N2,
),
("n1", "e2", "n3"): (
compacted_N1[20:30],
compacted_N3,
),
("n2", "e3", "n3"): (
compacted_N2[10:],
compacted_N3,
),
}
node_pairs = {
("n1", "e1", "n2"): (
N1[:20],
N2,
),
("n1", "e2", "n3"): (
N1[20:30],
N3,
),
("n2", "e3", "n3"): (
N2[10:],
N3,
),
}
unique_nodes, compacted_node_pairs = gb.unique_and_compact_node_pairs(
node_pairs
)
for etype, pair in compacted_node_pairs.items():
expected_u, expected_v = expected_compacted_pairs[etype]
u, v = pair
assert torch.equal(u, expected_u)
assert torch.equal(v, expected_v)
for ntype, nodes in unique_nodes.items():
expected_nodes = expected_unique_nodes[ntype]
assert torch.equal(nodes, expected_nodes)
def test_unique_and_compact_node_pairs_homo():
N = torch.randint(0, 50, (20,))
expected_unique_N, compacted_N = torch.unique(N, return_inverse=True)
expected_compacted_pairs = tuple(compacted_N.split(10))
node_pairs = tuple(N.split(10))
unique_nodes, compacted_node_pairs = gb.unique_and_compact_node_pairs(
node_pairs
)
expected_u, expected_v = expected_compacted_pairs
u, v = compacted_node_pairs
assert torch.equal(u, expected_u)
assert torch.equal(v, expected_v)
assert torch.equal(unique_nodes, expected_unique_N)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment