Unverified Commit be444e52 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)



* Update graph

* Fix for dgl.graph

* from_scipy

* Replace canonical_etypes with relations

* from_networkx

* Update for hetero_from_relations

* Roll back the change of canonical_etypes to relations

* heterograph

* bipartite

* Update doc

* Fix lint

* Fix lint

* Fix test cases

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix test

* Fix

* Update

* Use DGLError

* Update

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* rewrite sanity checks

* delete unnecessary checks

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Update

* Fix

* Update

* Fix
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
parent 0afc3cf8
...@@ -66,7 +66,6 @@ def main(args): ...@@ -66,7 +66,6 @@ def main(args):
hg = dataset[0] hg = dataset[0]
num_rels = len(hg.canonical_etypes) num_rels = len(hg.canonical_etypes)
num_of_ntype = len(hg.ntypes)
category = dataset.predict_category category = dataset.predict_category
num_classes = dataset.num_classes num_classes = dataset.num_classes
train_mask = hg.nodes[category].data.pop('train_mask') train_mask = hg.nodes[category].data.pop('train_mask')
...@@ -98,7 +97,7 @@ def main(args): ...@@ -98,7 +97,7 @@ def main(args):
category_id = i category_id = i
# edge type and normalization factor # edge type and normalization factor
g = dgl.to_homo(hg) g = dgl.to_homogeneous(hg, edata=['norm'])
# check cuda # check cuda
if args.gpu < 0: if args.gpu < 0:
......
...@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ ...@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
* * \c count : The array of edge occurrences per edge type. * * \c count : The array of edge occurrences per edge type.
* * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type. * * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
* *
* \note Example: consider the following graph: * \note Example: consider a graph with the following edges
* *
* g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]) * [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]
* *
* Then ToSimpleGraph(g) would yield the following elements: * Then ToSimpleGraph(g) would yield the following elements:
* *
* * The first element would be the simple graph itself: * * The first element would be the simple graph itself with the following edges
* *
* simple_g = dgl.graph([(0, 1), (1, 3), (1, 4), (2, 2)]) * [(0, 1), (1, 3), (1, 4), (2, 2)]
* *
* * The second element is an array \c count. \c count[i] stands for the number of edges * * The second element is an array \c count. \c count[i] stands for the number of edges
* connecting simple_g.src[i] and simple_g.dst[i] in the original graph. * connecting simple_g.src[i] and simple_g.dst[i] in the original graph.
......
...@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph): ...@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph):
self._node_frame = old_nframe self._node_frame = old_nframe
self._edge_frame = old_eframe self._edge_frame = old_eframe
@property
def is_homogeneous(self): def is_homogeneous(self):
"""Return if the graph is homogeneous.""" """Return if the graph is homogeneous."""
return True return True
......
...@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None): ...@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None):
for i in range(num_split)] for i in range(num_split)]
# Create graphs # Create graphs
gs = [convert.heterograph(edge_dict, num_nodes_dict, validate=True, idtype=g.idtype) gs = [convert.heterograph(edge_dict, num_nodes_dict, idtype=g.idtype)
for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)] for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)]
# Unbatch node features # Unbatch node features
......
This diff is collapsed.
...@@ -19,7 +19,7 @@ from .. import convert ...@@ -19,7 +19,7 @@ from .. import convert
from .. import batch from .. import batch
from .. import backend as F from .. import backend as F
from ..convert import graph as dgl_graph from ..convert import graph as dgl_graph
from ..convert import to_networkx from ..convert import from_networkx, to_networkx
backend = os.environ.get('DGLBACKEND', 'pytorch') backend = os.environ.get('DGLBACKEND', 'pytorch')
...@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset): ...@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset):
test_mask = _sample_mask(idx_test, labels.shape[0]) test_mask = _sample_mask(idx_test, labels.shape[0])
self._graph = graph self._graph = graph
g = dgl_graph(graph) g = from_networkx(graph)
g.ndata['train_mask'] = generate_mask_tensor(train_mask) g.ndata['train_mask'] = generate_mask_tensor(train_mask)
g.ndata['val_mask'] = generate_mask_tensor(val_mask) g.ndata['val_mask'] = generate_mask_tensor(val_mask)
...@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset): ...@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset):
for line in f.readlines(): for line in f.readlines():
if line.startswith('graph'): if line.startswith('graph'):
if len(elist) != 0: if len(elist) != 0:
self.graphs.append(dgl_graph(elist)) self.graphs.append(dgl_graph(tuple(zip(*elist))))
elist = [] elist = []
else: else:
u, v = line.strip().split(' ') u, v = line.strip().split(' ')
elist.append((int(u), int(v))) elist.append((int(u), int(v)))
if len(elist) != 0: if len(elist) != 0:
self.graphs.append(dgl_graph(elist)) self.graphs.append(dgl_graph(tuple(zip(*elist))))
with open("{}/pmpds.pkl".format(root), 'rb') as f: with open("{}/pmpds.pkl".format(root), 'rb') as f:
self.pmpds = _pickle_load(f) self.pmpds = _pickle_load(f)
self.labels = [] self.labels = []
......
...@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset): ...@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset):
self.labels.append(self.glabel_dict[glabel]) self.labels.append(self.glabel_dict[glabel])
g = dgl_graph([]) g = dgl_graph(([], []))
g.add_nodes(n_nodes) g.add_nodes(n_nodes)
nlabels = [] # node labels nlabels = [] # node labels
......
...@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None): ...@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None):
Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
and edge features. and edge features.
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]) >>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
>>> g2 = dgl.graph(([0, 2], [2, 3]) >>> g2 = dgl.graph(([0, 2], [2, 3]))
>>> g2.edata["e"] = th.ones(2, 4) >>> g2.edata["e"] = th.ones(2, 4)
Save Graphs into file Save Graphs into file
......
...@@ -6,7 +6,7 @@ import networkx as nx ...@@ -6,7 +6,7 @@ import networkx as nx
from .. import backend as F from .. import backend as F
from .dgl_dataset import DGLDataset from .dgl_dataset import DGLDataset
from .utils import deprecate_property from .utils import deprecate_property
from ..convert import graph as dgl_graph from ..convert import from_networkx
__all__ = ['KarateClubDataset', 'KarateClub'] __all__ = ['KarateClubDataset', 'KarateClub']
...@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset): ...@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset):
label = np.asarray( label = np.asarray(
[kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64) [kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64)
label = F.tensor(label) label = F.tensor(label)
g = dgl_graph(kc_graph) g = from_networkx(kc_graph)
g.ndata['label'] = label g.ndata['label'] = label
self._graph = g self._graph = g
self._data = [g] self._data = [g]
......
...@@ -6,7 +6,7 @@ import numpy as np ...@@ -6,7 +6,7 @@ import numpy as np
from .dgl_dataset import DGLDataset from .dgl_dataset import DGLDataset
from .utils import save_graphs, load_graphs, makedirs from .utils import save_graphs, load_graphs, makedirs
from .. import backend as F from .. import backend as F
from ..convert import graph as dgl_graph from ..convert import from_networkx
from ..transform import add_self_loop from ..transform import add_self_loop
__all__ = ['MiniGCDataset'] __all__ = ['MiniGCDataset']
...@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset): ...@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset):
# preprocess # preprocess
for i in range(self.num_graphs): for i in range(self.num_graphs):
# convert to DGLGraph, and add self loops # convert to DGLGraph, and add self loops
self.graphs[i] = add_self_loop(dgl_graph(self.graphs[i])) self.graphs[i] = add_self_loop(from_networkx(self.graphs[i]))
self.labels = F.tensor(np.array(self.labels).astype(np.int)) self.labels = F.tensor(np.array(self.labels).astype(np.int))
def _gen_cycle(self, n): def _gen_cycle(self, n):
......
...@@ -300,7 +300,7 @@ class RDFGraphDataset(DGLBuiltinDataset): ...@@ -300,7 +300,7 @@ class RDFGraphDataset(DGLBuiltinDataset):
# convert to heterograph # convert to heterograph
if self.verbose: if self.verbose:
print('Convert to heterograph ...') print('Convert to heterograph ...')
hg = dgl.to_hetero(g, hg = dgl.to_heterogeneous(g,
ntypes, ntypes,
etypes, etypes,
metagraph=mg) metagraph=mg)
......
...@@ -8,7 +8,7 @@ import os ...@@ -8,7 +8,7 @@ import os
from .dgl_dataset import DGLBuiltinDataset from .dgl_dataset import DGLBuiltinDataset
from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property
from .. import backend as F from .. import backend as F
from ..convert import graph as dgl_graph from ..convert import from_scipy
class RedditDataset(DGLBuiltinDataset): class RedditDataset(DGLBuiltinDataset):
...@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset): ...@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset):
# graph # graph
coo_adj = sp.load_npz(os.path.join( coo_adj = sp.load_npz(os.path.join(
self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str))) self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str)))
self._graph = dgl_graph(coo_adj) self._graph = from_scipy(coo_adj)
# features and labels # features and labels
reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz")) reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
features = reddit_data["feature"] features = reddit_data["feature"]
......
...@@ -8,7 +8,7 @@ import numpy.random as npr ...@@ -8,7 +8,7 @@ import numpy.random as npr
import scipy as sp import scipy as sp
from .dgl_dataset import DGLDataset from .dgl_dataset import DGLDataset
from ..convert import graph as dgl_graph from ..convert import from_scipy
from .. import batch from .. import batch
from .utils import save_info, save_graphs, load_info, load_graphs from .utils import save_info, save_graphs, load_info, load_graphs
...@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset): ...@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset):
pq = [generator() for _ in range(self._n_graphs)] pq = [generator() for _ in range(self._n_graphs)]
else: else:
raise RuntimeError() raise RuntimeError()
self._graphs = [dgl_graph(sbm(self._n_communities, self._block_size, *x)) for x in pq] self._graphs = [from_scipy(sbm(self._n_communities, self._block_size, *x)) for x in pq]
self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs] self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs]
in_degrees = lambda g: g.in_degrees().float() in_degrees = lambda g: g.in_degrees().float()
self._graph_degrees = [in_degrees(g) for g in self._graphs] self._graph_degrees = [in_degrees(g) for g in self._graphs]
......
...@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset): ...@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero( DS_graph_labels = self._idx_from_zero(
np.genfromtxt(self._file_path("graph_labels"), dtype=int)) np.genfromtxt(self._file_path("graph_labels"), dtype=int))
g = dgl_graph([]) g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1) g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1]) g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
...@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset): ...@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero( DS_graph_labels = self._idx_from_zero(
loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int)) loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int))
g = dgl_graph([]) g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1) g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1]) g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
......
...@@ -596,8 +596,11 @@ class EdgeCollator(Collator): ...@@ -596,8 +596,11 @@ class EdgeCollator(Collator):
'graph has multiple or no edge types; '\ 'graph has multiple or no edge types; '\
'please return a dict in negative sampler.' 'please return a dict in negative sampler.'
neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst} neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
# Get dtype from a tuple of tensors
dtype = F.dtype(list(neg_srcdst.values())[0][0])
neg_edges = { neg_edges = {
etype: neg_srcdst.get(etype, []) for etype in self.g.canonical_etypes} etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
for etype in self.g.canonical_etypes}
neg_pair_graph = heterograph( neg_pair_graph = heterograph(
neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes}) neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})
......
...@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(), ...@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
rows = F.copy_to(F.astype(eids / num_nodes, idtype), device) rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_nodes, idtype), device) cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
g = convert.graph((rows, cols), g = convert.graph((rows, cols),
num_nodes=num_nodes, validate=False, num_nodes=num_nodes,
formats=formats,
idtype=idtype, device=device) idtype=idtype, device=device)
return g return g.formats(formats)
def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges, def rand_bipartite(utype, etype, vtype,
num_src_nodes, num_dst_nodes, num_edges,
idtype=F.int64, device=F.cpu(), idtype=F.int64, device=F.cpu(),
formats=['csr', 'coo', 'csc']): formats=['csr', 'coo', 'csc']):
"""Generate a random bipartite graph of the given number of src/dst nodes and """Generate a random bipartite graph of the given number of src/dst nodes and
...@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges, ...@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
Parameters Parameters
---------- ----------
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
num_src_nodes : int num_src_nodes : int
The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`. The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
num_dst_nodes : int num_dst_nodes : int
...@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges, ...@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False) eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device) rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device) cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
g = convert.bipartite((rows, cols), g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
num_nodes=(num_src_nodes, num_dst_nodes), validate=False, {utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device, idtype=idtype, device=device)
formats=formats) return g.formats(formats)
return g
This diff is collapsed.
...@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase): ...@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase):
""" """
if order is None: if order is None:
order = "" order = ""
elif order not in ['srcdst', 'eid']:
raise DGLError("Expect order to be one of None, 'srcdst', 'eid', "
"got {}".format(order))
edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order) edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order)
src = F.from_dgl_nd(edge_array(0)) src = F.from_dgl_nd(edge_array(0))
dst = F.from_dgl_nd(edge_array(1)) dst = F.from_dgl_nd(edge_array(1))
......
...@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block): ...@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block):
is the output feature size. is the output feature size.
""" """
with graph.local_scope(): with graph.local_scope():
assert graph.is_homogeneous(), \ assert graph.is_homogeneous, \
"not a homograph; convert it with to_homo and pass in the edge type as argument" "not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]), zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]),
ctx=feat.context) ctx=feat.context)
feat = nd.concat(feat, zero_pad, dim=-1) feat = nd.concat(feat, zero_pad, dim=-1)
......
...@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block): ...@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block):
mx.ndarray.NDArray mx.ndarray.NDArray
New node features. New node features.
""" """
assert g.is_homogeneous(), \ assert g.is_homogeneous, \
"not a homograph; convert it with to_homo and pass in the edge type as argument" "not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
with g.local_scope(): with g.local_scope():
g.ndata['h'] = x g.ndata['h'] = x
g.edata['type'] = etypes g.edata['type'] = etypes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment