Unverified Commit be444e52 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Doc/Feature] Refactor, doc update and behavior fix for graphs (#1983)



* Update graph

* Fix for dgl.graph

* from_scipy

* Replace canonical_etypes with relations

* from_networkx

* Update for hetero_from_relations

* Roll back the change of canonical_etypes to relations

* heterograph

* bipartite

* Update doc

* Fix lint

* Fix lint

* Fix test cases

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix test

* Fix

* Update

* Use DGLError

* Update

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Fix

* Update

* Update

* Update

* Update

* Update

* Update

* rewrite sanity checks

* delete unnecessary checks

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Update

* Fix

* Update

* Update

* Update

* Fix

* Fix

* Fix

* Update

* Fix

* Update

* Fix

* Fix

* Update

* Fix

* Update

* Fix
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
Co-authored-by: default avatarMinjie Wang <wmjlyjemaine@gmail.com>
Co-authored-by: default avatarQuan Gan <coin2028@hotmail.com>
parent 0afc3cf8
......@@ -66,7 +66,6 @@ def main(args):
hg = dataset[0]
num_rels = len(hg.canonical_etypes)
num_of_ntype = len(hg.ntypes)
category = dataset.predict_category
num_classes = dataset.num_classes
train_mask = hg.nodes[category].data.pop('train_mask')
......@@ -98,7 +97,7 @@ def main(args):
category_id = i
# edge type and normalization factor
g = dgl.to_homo(hg)
g = dgl.to_homogeneous(hg, edata=['norm'])
# check cuda
if args.gpu < 0:
......
......@@ -89,15 +89,15 @@ ToBlock(HeteroGraphPtr graph, const std::vector<IdArray> &rhs_nodes, bool includ
* * \c count : The array of edge occurrences per edge type.
* * \c edge_map : The mapping from original edge IDs to new edge IDs per edge type.
*
* \note Example: consider the following graph:
* \note Example: consider a graph with the following edges
*
* g = dgl.graph([(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)])
* [(0, 1), (1, 3), (2, 2), (1, 3), (1, 4), (1, 4)]
*
* Then ToSimpleGraph(g) would yield the following elements:
*
* * The first element would be the simple graph itself:
* * The first element would be the simple graph itself with the following edges
*
* simple_g = dgl.graph([(0, 1), (1, 3), (1, 4), (2, 2)])
* [(0, 1), (1, 3), (1, 4), (2, 2)]
*
* * The second element is an array \c count. \c count[i] stands for the number of edges
* connecting simple_g.src[i] and simple_g.dst[i] in the original graph.
......
......@@ -4038,6 +4038,7 @@ class DGLGraph(DGLBaseGraph):
self._node_frame = old_nframe
self._edge_frame = old_eframe
@property
def is_homogeneous(self):
"""Return if the graph is homogeneous."""
return True
......
......@@ -397,7 +397,7 @@ def unbatch(g, node_split=None, edge_split=None):
for i in range(num_split)]
# Create graphs
gs = [convert.heterograph(edge_dict, num_nodes_dict, validate=True, idtype=g.idtype)
gs = [convert.heterograph(edge_dict, num_nodes_dict, idtype=g.idtype)
for edge_dict, num_nodes_dict in zip(edge_dict_per, num_nodes_dict_per)]
# Unbatch node features
......
This diff is collapsed.
......@@ -19,7 +19,7 @@ from .. import convert
from .. import batch
from .. import backend as F
from ..convert import graph as dgl_graph
from ..convert import to_networkx
from ..convert import from_networkx, to_networkx
backend = os.environ.get('DGLBACKEND', 'pytorch')
......@@ -119,7 +119,7 @@ class CitationGraphDataset(DGLBuiltinDataset):
test_mask = _sample_mask(idx_test, labels.shape[0])
self._graph = graph
g = dgl_graph(graph)
g = from_networkx(graph)
g.ndata['train_mask'] = generate_mask_tensor(train_mask)
g.ndata['val_mask'] = generate_mask_tensor(val_mask)
......@@ -794,13 +794,13 @@ class CoraBinary(DGLBuiltinDataset):
for line in f.readlines():
if line.startswith('graph'):
if len(elist) != 0:
self.graphs.append(dgl_graph(elist))
self.graphs.append(dgl_graph(tuple(zip(*elist))))
elist = []
else:
u, v = line.strip().split(' ')
elist.append((int(u), int(v)))
if len(elist) != 0:
self.graphs.append(dgl_graph(elist))
self.graphs.append(dgl_graph(tuple(zip(*elist))))
with open("{}/pmpds.pkl".format(root), 'rb') as f:
self.pmpds = _pickle_load(f)
self.labels = []
......
......@@ -157,7 +157,7 @@ class GINDataset(DGLBuiltinDataset):
self.labels.append(self.glabel_dict[glabel])
g = dgl_graph([])
g = dgl_graph(([], []))
g.add_nodes(n_nodes)
nlabels = [] # node labels
......
......@@ -86,8 +86,8 @@ def save_graphs(filename, g_list, labels=None):
Create :code:`DGLGraph`/:code:`DGLHeteroGraph` objects and initialize node
and edge features.
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3])
>>> g2 = dgl.graph(([0, 2], [2, 3])
>>> g1 = dgl.graph(([0, 1, 2], [1, 2, 3]))
>>> g2 = dgl.graph(([0, 2], [2, 3]))
>>> g2.edata["e"] = th.ones(2, 4)
Save Graphs into file
......
......@@ -6,7 +6,7 @@ import networkx as nx
from .. import backend as F
from .dgl_dataset import DGLDataset
from .utils import deprecate_property
from ..convert import graph as dgl_graph
from ..convert import from_networkx
__all__ = ['KarateClubDataset', 'KarateClub']
......@@ -56,7 +56,7 @@ class KarateClubDataset(DGLDataset):
label = np.asarray(
[kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64)
label = F.tensor(label)
g = dgl_graph(kc_graph)
g = from_networkx(kc_graph)
g.ndata['label'] = label
self._graph = g
self._data = [g]
......
......@@ -6,7 +6,7 @@ import numpy as np
from .dgl_dataset import DGLDataset
from .utils import save_graphs, load_graphs, makedirs
from .. import backend as F
from ..convert import graph as dgl_graph
from ..convert import from_networkx
from ..transform import add_self_loop
__all__ = ['MiniGCDataset']
......@@ -147,7 +147,7 @@ class MiniGCDataset(DGLDataset):
# preprocess
for i in range(self.num_graphs):
# convert to DGLGraph, and add self loops
self.graphs[i] = add_self_loop(dgl_graph(self.graphs[i]))
self.graphs[i] = add_self_loop(from_networkx(self.graphs[i]))
self.labels = F.tensor(np.array(self.labels).astype(np.int))
def _gen_cycle(self, n):
......
......@@ -300,10 +300,10 @@ class RDFGraphDataset(DGLBuiltinDataset):
# convert to heterograph
if self.verbose:
print('Convert to heterograph ...')
hg = dgl.to_hetero(g,
ntypes,
etypes,
metagraph=mg)
hg = dgl.to_heterogeneous(g,
ntypes,
etypes,
metagraph=mg)
if self.verbose:
print('#Node types:', len(hg.ntypes))
print('#Canonical edge types:', len(hg.etypes))
......
......@@ -8,7 +8,7 @@ import os
from .dgl_dataset import DGLBuiltinDataset
from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs, deprecate_property
from .. import backend as F
from ..convert import graph as dgl_graph
from ..convert import from_scipy
class RedditDataset(DGLBuiltinDataset):
......@@ -140,7 +140,7 @@ class RedditDataset(DGLBuiltinDataset):
# graph
coo_adj = sp.load_npz(os.path.join(
self.raw_path, "reddit{}_graph.npz".format(self._self_loop_str)))
self._graph = dgl_graph(coo_adj)
self._graph = from_scipy(coo_adj)
# features and labels
reddit_data = np.load(os.path.join(self.raw_path, "reddit_data.npz"))
features = reddit_data["feature"]
......
......@@ -8,7 +8,7 @@ import numpy.random as npr
import scipy as sp
from .dgl_dataset import DGLDataset
from ..convert import graph as dgl_graph
from ..convert import from_scipy
from .. import batch
from .utils import save_info, save_graphs, load_info, load_graphs
......@@ -124,7 +124,7 @@ class SBMMixtureDataset(DGLDataset):
pq = [generator() for _ in range(self._n_graphs)]
else:
raise RuntimeError()
self._graphs = [dgl_graph(sbm(self._n_communities, self._block_size, *x)) for x in pq]
self._graphs = [from_scipy(sbm(self._n_communities, self._block_size, *x)) for x in pq]
self._line_graphs = [g.line_graph(backtracking=False) for g in self._graphs]
in_degrees = lambda g: g.in_degrees().float()
self._graph_degrees = [in_degrees(g) for g in self._graphs]
......
......@@ -100,7 +100,7 @@ class LegacyTUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero(
np.genfromtxt(self._file_path("graph_labels"), dtype=int))
g = dgl_graph([])
g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
......@@ -296,7 +296,7 @@ class TUDataset(DGLBuiltinDataset):
DS_graph_labels = self._idx_from_zero(
loadtxt(self._file_path("graph_labels"), delimiter=",").astype(int))
g = dgl_graph([])
g = dgl_graph(([], []))
g.add_nodes(int(DS_edge_list.max()) + 1)
g.add_edges(DS_edge_list[:, 0], DS_edge_list[:, 1])
......
......@@ -596,8 +596,11 @@ class EdgeCollator(Collator):
'graph has multiple or no edge types; '\
'please return a dict in negative sampler.'
neg_srcdst = {self.g.canonical_etypes[0]: neg_srcdst}
# Get dtype from a tuple of tensors
dtype = F.dtype(list(neg_srcdst.values())[0][0])
neg_edges = {
etype: neg_srcdst.get(etype, []) for etype in self.g.canonical_etypes}
etype: neg_srcdst.get(etype, (F.tensor([], dtype), F.tensor([], dtype)))
for etype in self.g.canonical_etypes}
neg_pair_graph = heterograph(
neg_edges, {ntype: self.g.number_of_nodes(ntype) for ntype in self.g.ntypes})
......
......@@ -38,12 +38,12 @@ def rand_graph(num_nodes, num_edges, idtype=F.int64, device=F.cpu(),
rows = F.copy_to(F.astype(eids / num_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_nodes, idtype), device)
g = convert.graph((rows, cols),
num_nodes=num_nodes, validate=False,
formats=formats,
num_nodes=num_nodes,
idtype=idtype, device=device)
return g
return g.formats(formats)
def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
def rand_bipartite(utype, etype, vtype,
num_src_nodes, num_dst_nodes, num_edges,
idtype=F.int64, device=F.cpu(),
formats=['csr', 'coo', 'csc']):
"""Generate a random bipartite graph of the given number of src/dst nodes and
......@@ -53,6 +53,12 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
Parameters
----------
utype : str, optional
The name of the source node type.
etype : str, optional
The name of the edge type.
vtype : str, optional
The name of the destination node type.
num_src_nodes : int
The number of source nodes, the :math:`|U|` in :math:`G=(U,V,E)`.
num_dst_nodes : int
......@@ -75,8 +81,7 @@ def rand_bipartite(num_src_nodes, num_dst_nodes, num_edges,
eids = random.choice(num_src_nodes * num_dst_nodes, num_edges, replace=False)
rows = F.copy_to(F.astype(eids / num_dst_nodes, idtype), device)
cols = F.copy_to(F.astype(eids % num_dst_nodes, idtype), device)
g = convert.bipartite((rows, cols),
num_nodes=(num_src_nodes, num_dst_nodes), validate=False,
idtype=idtype, device=device,
formats=formats)
return g
g = convert.heterograph({(utype, etype, vtype): (rows, cols)},
{utype: num_src_nodes, vtype: num_dst_nodes},
idtype=idtype, device=device)
return g.formats(formats)
This diff is collapsed.
......@@ -547,6 +547,9 @@ class HeteroGraphIndex(ObjectBase):
"""
if order is None:
order = ""
elif order not in ['srcdst', 'eid']:
raise DGLError("Expect order to be one of None, 'srcdst', 'eid', "
"got {}".format(order))
edge_array = _CAPI_DGLHeteroEdges(self, int(etype), order)
src = F.from_dgl_nd(edge_array(0))
dst = F.from_dgl_nd(edge_array(1))
......
......@@ -76,8 +76,9 @@ class GatedGraphConv(nn.Block):
is the output feature size.
"""
with graph.local_scope():
assert graph.is_homogeneous(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
assert graph.is_homogeneous, \
"not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
zero_pad = nd.zeros((feat.shape[0], self._out_feats - feat.shape[1]),
ctx=feat.context)
feat = nd.concat(feat, zero_pad, dim=-1)
......
......@@ -229,8 +229,9 @@ class RelGraphConv(gluon.Block):
mx.ndarray.NDArray
New node features.
"""
assert g.is_homogeneous(), \
"not a homograph; convert it with to_homo and pass in the edge type as argument"
assert g.is_homogeneous, \
"not a homogeneous graph; convert it with to_homogeneous " \
"and pass in the edge type as argument"
with g.local_scope():
g.ndata['h'] = x
g.edata['type'] = etypes
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment