Unverified Commit cc079e1c authored by Mingbang Wang's avatar Mingbang Wang Committed by GitHub
Browse files

[GraphBolt] Add implementation of num_nodes (#6395)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-39-125.ap-northeast-1.compute.internal>
Co-authored-by: default avatarHongzhi (Steve), Chen <chenhongzhi.nkcs@gmail.com>
parent fdb4737c
......@@ -12,6 +12,7 @@ from ...base import ETYPE
from ...convert import to_homogeneous
from ...heterograph import DGLGraph
from ..base import etype_str_to_tuple, etype_tuple_to_str, ORIGINAL_EDGE_ID
from ..sampling_graph import SamplingGraph
from .sampled_subgraph_impl import SampledSubgraphImpl
......@@ -74,7 +75,7 @@ class GraphMetadata:
self.edge_type_to_id = edge_type_to_id
class CSCSamplingGraph:
class CSCSamplingGraph(SamplingGraph):
r"""Class for CSC sampling graph."""
def __repr__(self):
......@@ -83,6 +84,7 @@ class CSCSamplingGraph:
def __init__(
self, c_csc_graph: torch.ScriptObject, metadata: Optional[GraphMetadata]
):
super().__init__()
self._c_csc_graph = c_csc_graph
self._metadata = metadata
......@@ -108,6 +110,54 @@ class CSCSamplingGraph:
"""
return self._c_csc_graph.num_edges()
@property
def num_nodes(self) -> Union[int, Dict[str, int]]:
"""The number of nodes in the graph.
- If the graph is homogenous, returns an integer.
- If the graph is heterogenous, returns a dictionary.
Returns
-------
Union[int, Dict[str, int]]
The number of nodes. Integer indicates the total nodes number of a
homogenous graph; dict indicates nodes number per node types of a
heterogenous graph.
Examples
--------
>>> import dgl.graphbolt as gb, torch
>>> total_num_nodes = 5
>>> total_num_edges = 12
>>> ntypes = {"N0": 0, "N1": 1}
>>> etypes = {"N0:R0:N0": 0, "N0:R1:N1": 1,
... "N1:R2:N0": 2, "N1:R3:N1": 3}
>>> indptr = torch.LongTensor([0, 3, 5, 7, 9, 12])
>>> indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4])
>>> node_type_offset = torch.LongTensor([0, 2, 5])
>>> type_per_edge = torch.LongTensor(
... [0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3])
>>> metadata = gb.GraphMetadata(ntypes, etypes)
>>> graph = gb.from_csc(indptr, indices, node_type_offset,
... type_per_edge, None, metadata)
>>> print(graph.num_nodes)
{'N0': tensor(2), 'N1': tensor(3)}
"""
offset = self.node_type_offset
# Homogenous.
if offset is None or self.metadata is None:
return self._c_csc_graph.num_nodes()
# Heterogenous
else:
num_nodes_per_type = {
_type: offset[_idx + 1] - offset[_idx]
for _type, _idx in self.metadata.node_type_to_id.items()
}
return num_nodes_per_type
@property
def csc_indptr(self) -> torch.tensor:
"""Returns the indices pointer in the CSC graph.
......@@ -312,8 +362,8 @@ class CSCSamplingGraph:
without replacement. If True, a value can be selected multiple
times. Otherwise, each value can be selected only once.
probs_name: str, optional
An optional string specifying the name of an edge attribute used a. This
attribute tensor should contain (unnormalized) probabilities
An optional string specifying the name of an edge attribute used.
This attribute tensor should contain (unnormalized) probabilities
corresponding to each neighboring edge of a node. It must be a 1D
floating-point or boolean tensor, with the number of elements
equalling the total number of edges.
......
......@@ -23,18 +23,3 @@ class SamplingGraph:
heterogenous graph.
"""
raise NotImplementedError
@property
def num_edges(self) -> Union[int, Dict[str, int]]:
"""The number of edges in the graph.
- If the graph is homogenous, returns an integer.
- If the graph is heterogenous, returns a dictionary.
Returns
-------
Union[int, Dict[str, int]]
The number of edges. Integer indicates the total edges number of a
homogenous graph; dict indicates edges number per edge types of a
heterogenous graph.
"""
raise NotImplementedError
......@@ -177,6 +177,84 @@ def test_hetero_graph(total_num_nodes, total_num_edges, num_ntypes, num_etypes):
assert metadata.edge_type_to_id == graph.metadata.edge_type_to_id
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Graph is CPU only at present.",
)
@pytest.mark.parametrize(
"total_num_nodes, total_num_edges",
[(1, 1), (100, 1), (10, 50), (1000, 50000)],
)
def test_num_nodes_homo(total_num_nodes, total_num_edges):
csc_indptr, indices = gbt.random_homo_graph(
total_num_nodes, total_num_edges
)
edge_attributes = {
"A1": torch.randn(total_num_edges),
"A2": torch.randn(total_num_edges),
}
graph = gb.from_csc(csc_indptr, indices, edge_attributes=edge_attributes)
assert graph.num_nodes == total_num_nodes
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Graph is CPU only at present.",
)
def test_num_nodes_hetero():
"""Original graph in COO:
1 0 1 0 1
1 0 1 1 0
0 1 0 1 0
0 1 0 0 1
1 0 0 0 1
node_type_0: [0, 1]
node_type_1: [2, 3, 4]
edge_type_0: node_type_0 -> node_type_0
edge_type_1: node_type_0 -> node_type_1
edge_type_2: node_type_1 -> node_type_0
edge_type_3: node_type_1 -> node_type_1
"""
# Initialize data.
total_num_nodes = 5
total_num_edges = 12
ntypes = {
"N0": 0,
"N1": 1,
}
etypes = {
"N0:R0:N0": 0,
"N0:R1:N1": 1,
"N1:R2:N0": 2,
"N1:R3:N1": 3,
}
indptr = torch.LongTensor([0, 3, 5, 7, 9, 12])
indices = torch.LongTensor([0, 1, 4, 2, 3, 0, 1, 1, 2, 0, 3, 4])
node_type_offset = torch.LongTensor([0, 2, 5])
type_per_edge = torch.LongTensor([0, 0, 2, 2, 2, 1, 1, 1, 3, 1, 3, 3])
assert indptr[-1] == total_num_edges
assert indptr[-1] == len(indices)
assert node_type_offset[-1] == total_num_nodes
assert all(type_per_edge < len(etypes))
# Construct CSCSamplingGraph.
metadata = gb.GraphMetadata(ntypes, etypes)
graph = gb.from_csc(
indptr, indices, node_type_offset, type_per_edge, None, metadata
)
# Verify nodes number per node types.
assert graph.num_nodes == {
"N0": 2,
"N1": 3,
}
assert graph.num_nodes["N0"] == 2
assert graph.num_nodes["N1"] == 3
assert "N2" not in graph.num_nodes
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Graph is CPU only at present.",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment