Unverified Commit 405de769 authored by peizhou001's avatar peizhou001 Committed by GitHub
Browse files

[Graphbolt]Add compact for node list (#6176)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-16-19.ap-northeast-1.compute.internal>
parent 24c875c6
......@@ -20,7 +20,7 @@ from .negative_sampler import *
from .data_block import *
from .node_classification_block import *
from .link_prediction_block import *
from .utils import unique_and_compact_node_pairs
from .utils import unique_and_compact, unique_and_compact_node_pairs
def load_graphbolt():
......
"""Utility functions for sampling."""
from collections import defaultdict
from typing import Dict, Tuple, Union
from typing import Dict, List, Tuple, Union
import torch
def unique_and_compact(
nodes: Union[
List[torch.Tensor],
Dict[str, List[torch.Tensor]],
],
):
"""
Compact a list of nodes tensor.
Parameters
----------
nodes : List[torch.Tensor] or Dict[str, List[torch.Tensor]]
List of nodes for compacting.
the unique_and_compact will be done per type
- If `nodes` is a list of tensor: All the tensors will do unique and
compact together, usually it is used for homogeneous graph.
- If `nodes` is a list of dictionary: The keys should be node type and
the values should be corresponding nodes, the unique and compact will
be done per type, usually it is used for heterogeneous graph.
Returns
-------
Tuple[unique_nodes, compacted_node_list]
The Unique nodes (per type) of all nodes in the input. And the compacted
nodes list, where IDs inside are replaced with compacted node IDs.
"Compacted node list" indicates that the node IDs in the input node
list are replaced with mapped node IDs, where each type of node is
mapped to a contiguous space of IDs ranging from 0 to N.
"""
is_heterogeneous = isinstance(nodes, dict)
def unique_and_compact_per_type(nodes):
nums = [node.size(0) for node in nodes]
nodes = torch.cat(nodes)
empty_tensor = nodes.new_empty(0)
unique, compacted, _ = torch.ops.graphbolt.unique_and_compact(
nodes, empty_tensor, empty_tensor
)
compacted = compacted.split(nums)
return unique, compacted
if is_heterogeneous:
unique, compacted = {}, {}
for ntype, nodes_of_type in nodes.items():
unique[ntype], compacted[ntype] = unique_and_compact_per_type(
nodes_of_type
)
return unique, compacted
else:
return unique_and_compact_per_type(nodes)
def unique_and_compact_node_pairs(
node_pairs: Union[
Tuple[torch.Tensor, torch.Tensor],
......
......@@ -3,6 +3,50 @@ import pytest
import torch
def test_unique_and_compact_hetero():
N1 = torch.randint(0, 50, (30,))
N2 = torch.randint(0, 50, (20,))
N3 = torch.randint(0, 50, (10,))
unique_N1 = torch.unique(N1)
unique_N2 = torch.unique(N2)
unique_N3 = torch.unique(N3)
expected_unique = {
"n1": unique_N1,
"n2": unique_N2,
"n3": unique_N3,
}
nodes_dict = {
"n1": N1.split(5),
"n2": N2.split(4),
"n3": N3.split(2),
}
unique, compacted = gb.unique_and_compact(nodes_dict)
for ntype, nodes in unique.items():
expected_nodes = expected_unique[ntype]
assert torch.equal(torch.sort(nodes)[0], expected_nodes)
for ntype, nodes in compacted.items():
expected_nodes = nodes_dict[ntype]
for expected_node, node in zip(expected_nodes, nodes):
node = unique[ntype][node]
assert torch.equal(expected_node, node)
def test_unique_and_compact_homo():
N = torch.randint(0, 50, (200,))
expected_unique_N = torch.unique(N)
nodes_list = N.split(5)
unique, compacted = gb.unique_and_compact(nodes_list)
assert torch.equal(torch.sort(unique)[0], expected_unique_N)
for expected_node, node in zip(nodes_list, compacted):
node = unique[node]
assert torch.equal(expected_node, node)
def test_unique_and_compact_node_pairs_hetero():
N1 = torch.randint(0, 50, (30,))
N2 = torch.randint(0, 50, (20,))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment