Unverified Commit 49e336d2 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[GraphBolt] enable to convert DGLGraph to CSCSamplingGraph (#5948)

parent cdf65f4d
......@@ -7,6 +7,10 @@ from typing import Dict, Optional, Tuple
import torch
from ...base import ETYPE
from ...convert import to_homogeneous
from ...heterograph import DGLGraph
class GraphMetadata:
r"""Class for metadata of csc sampling graph."""
......@@ -452,3 +456,27 @@ def save_csc_sampling_graph(graph, filename):
metadata_filename, arcname=os.path.basename(metadata_filename)
)
print(f"CSCSamplingGraph has been saved to {filename}.")
def from_dglgraph(g: DGLGraph) -> CSCSamplingGraph:
"""Convert a DGLGraph to CSCSamplingGraph."""
homo_g, ntype_count, _ = to_homogeneous(g, return_count=True)
# Initialize metadata.
node_type_to_id = {ntype: g.get_ntype_id(ntype) for ntype in g.ntypes}
edge_type_to_id = {
etype: g.get_etype_id(etype) for etype in g.canonical_etypes
}
metadata = GraphMetadata(node_type_to_id, edge_type_to_id)
# Obtain CSC matrix.
indptr, indices, _ = homo_g.adj_tensors("csc")
ntype_count.insert(0, 0)
node_type_offset = torch.cumsum(torch.LongTensor(ntype_count), 0)
type_per_edge = homo_g.edata[ETYPE]
return CSCSamplingGraph(
torch.ops.graphbolt.from_csc(
indptr, indices, node_type_offset, type_per_edge
),
metadata,
)
......@@ -4,10 +4,12 @@ import unittest
import backend as F
import dgl
import dgl.graphbolt as gb
import pytest
import torch
from scipy import sparse as spsp
torch.manual_seed(3407)
......@@ -733,6 +735,70 @@ def test_hetero_graph_on_shared_memory(
assert metadata.edge_type_to_id == graph2.metadata.edge_type_to_id
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Graph on GPU is not supported yet.",
)
def test_from_dglgraph_homogeneous():
dgl_g = dgl.rand_graph(1000, 10 * 1000)
gb_g = gb.from_dglgraph(dgl_g)
assert gb_g.num_nodes == dgl_g.num_nodes()
assert gb_g.num_edges == dgl_g.num_edges()
assert torch.equal(gb_g.node_type_offset, torch.tensor([0, 1000]))
assert torch.all(gb_g.type_per_edge == 0)
assert gb_g.metadata.node_type_to_id == {"_N": 0}
assert gb_g.metadata.edge_type_to_id == {("_N", "_E", "_N"): 0}
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Graph on GPU is not supported yet.",
)
def test_from_dglgraph_heterogeneous():
def create_random_hetero():
num_nodes = {"n1": 1000, "n2": 1010, "n3": 1020}
etypes = [
("n1", "r12", "n2"),
("n2", "r21", "n1"),
("n1", "r13", "n3"),
("n2", "r23", "n3"),
]
edges = {}
for etype in etypes:
src_ntype, _, dst_ntype = etype
arr = spsp.random(
num_nodes[src_ntype],
num_nodes[dst_ntype],
density=0.001,
format="coo",
random_state=100,
)
edges[etype] = (arr.row, arr.col)
return dgl.heterograph(edges, num_nodes)
dgl_g = create_random_hetero()
gb_g = gb.from_dglgraph(dgl_g)
assert gb_g.num_nodes == dgl_g.num_nodes()
assert gb_g.num_edges == dgl_g.num_edges()
assert torch.equal(
gb_g.node_type_offset, torch.tensor([0, 1000, 2010, 3030])
)
assert torch.all(gb_g.type_per_edge[:-1] <= gb_g.type_per_edge[1:])
assert gb_g.metadata.node_type_to_id == {
"n1": 0,
"n2": 1,
"n3": 2,
}
assert gb_g.metadata.edge_type_to_id == {
("n1", "r12", "n2"): 0,
("n1", "r13", "n3"): 1,
("n2", "r21", "n1"): 2,
("n2", "r23", "n3"): 3,
}
if __name__ == "__main__":
test_sample_neighbors()
test_sample_neighbors_replace(True, 12)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment