Unverified Commit 47392596 authored by GaiYu0's avatar GaiYu0 Committed by GitHub
Browse files

Merge pull request #72 from jermainewang/line-graph

[MODEL] CDGNN and line graph
parents 7603b8c3 8738ebfa
Community Detection with Graph Neural Networks (CDGNN)
============
Paper link: [https://arxiv.org/abs/1705.08415](https://arxiv.org/abs/1705.08415)
Author's code repo: [https://github.com/joanbruna/GNN_community](https://github.com/joanbruna/GNN_community)
This folder contains a DGL implementation of the CDGNN model.
An experiment on the Stochastic Block Model in default settings can be run with
```bash
python train.py
```
An experiment on the Stochastic Block Model in customized settings can be run with
```bash
python train.py --batch-size BATCH_SIZE --gpu GPU --n-communities N_COMMUNITIES --n-features N_FEATURES --n-graphs N_GRAPH --n-iterations N_ITERATIONS --n-layers N_LAYER --n-nodes N_NODE --model-path MODEL_PATH --radius RADIUS
```
...@@ -3,237 +3,93 @@ Supervised Community Detection with Hierarchical Graph Neural Networks ...@@ -3,237 +3,93 @@ Supervised Community Detection with Hierarchical Graph Neural Networks
https://arxiv.org/abs/1705.08415 https://arxiv.org/abs/1705.08415
Deviations from paper: Deviations from paper:
- Addition of global aggregation operator. - Pm Pd
- Message passing is equivalent to `A^j \cdot X`, instead of `\min(1, A^j) \cdot X`.
""" """
# TODO self-loop?
# TODO in-place edit of node_reprs/edge_reprs in message_func/update_func?
# TODO batch-norm
import copy import copy
import itertools import itertools
import dgl.graph as G import dgl
import dgl.function as fn
import networkx as nx import networkx as nx
import torch as th import torch as th
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F
class GLGModule(nn.Module): class GNNModule(nn.Module):
__SHADOW__ = 'shadow'
def __init__(self, in_feats, out_feats, radius): def __init__(self, in_feats, out_feats, radius):
super().__init__() super().__init__()
self.out_feats = out_feats
self.radius = radius self.radius = radius
new_linear = lambda: nn.Linear(in_feats, out_feats) new_linear = lambda: nn.Linear(in_feats, out_feats * 2)
new_module_list = lambda: nn.ModuleList([new_linear() for i in range(radius)]) new_module_list = lambda: nn.ModuleList([new_linear() for i in range(radius)])
self.theta_x, self.theta_y, self.theta_deg, self.theta_global = \ self.theta_x, self.theta_deg, self.theta_y = \
new_linear(), new_linear(), new_linear(), new_linear() new_linear(), new_linear(), new_linear()
self.theta_list = new_module_list() self.theta_list = new_module_list()
self.gamma_x, self.gamma_y, self.gamma_deg, self.gamma_global = \ self.gamma_y, self.gamma_deg, self.gamma_x = \
new_linear(), new_linear(), new_linear(), new_linear() new_linear(), new_linear(), new_linear()
self.gamma_list = new_module_list() self.gamma_list = new_module_list()
@staticmethod self.bn_x = nn.BatchNorm1d(out_feats)
def copy(which): self.bn_y = nn.BatchNorm1d(out_feats)
if which == 'src':
return lambda src, trg, _: src.copy()
elif which == 'trg':
return lambda src, trg, _: trg.copy()
@staticmethod
def aggregate(msg_fld, trg_fld, normalize=False):
def a(node_reprs, edge_reprs):
node_reprs = node_reprs.copy()
node_reprs[trg_fld] = sum(msg[msg_fld] for msg in edge_reprs)
if normalize:
node_reprs[trg_fld] /= len(edge_reprs)
return node_reprs
return a
@staticmethod
def pull(msg_fld, trg_fld):
def p(node_reprs, edge_reprs):
node_reprs = node_reprs.copy()
node_reprs[trg_fld] = edge_reprs[0][msg_fld]
return node_reprs
return p
def local_aggregate(self, g):
def step():
g.register_message_func(self.copy('src'), g.edges)
g.register_update_func(self.aggregate('x', 'x'), g.nodes)
g.update_all()
step()
for reprs in g.nodes.values():
reprs[0] = reprs['x']
for i in range(1, self.radius):
for j in range(2 ** (i - 1)):
step()
for reprs in g.nodes.values():
reprs[i] = reprs['x']
@staticmethod
def global_aggregate(g):
shadow = GLGModule.__SHADOW__
copy, aggregate, pull = GLGModule.copy, GLGModule.aggregate, GLGModule.pull
node_list = list(g.nodes)
uv_list = [(node, shadow) for node in g.nodes]
vu_list = [(shadow, node) for node in g.nodes]
g.add_node(shadow) # TODO context manager
tuple(itertools.starmap(g.add_edge, uv_list))
g.register_message_func(copy('src'), uv_list)
g.register_update_func(aggregate('x', 'global', normalize=True), (shadow,))
g.update_to(shadow)
tuple(itertools.starmap(g.add_edge, vu_list))
g.register_message_func(copy('src'), vu_list)
g.register_update_func(pull('global', 'global'), node_list)
g.update_from(shadow)
g.remove_node(shadow)
@staticmethod
def multiply_by_degree(g):
g.register_message_func(lambda *args: None, g.edges)
def update_func(node_reprs, _):
node_reprs = node_reprs.copy()
node_reprs['deg'] = node_reprs['x'] * node_reprs['degree']
return node_reprs
g.register_update_func(update_func, g.nodes)
g.update_all()
@staticmethod
def message_func(src, trg, _):
return {'y' : src['x']}
def update_func(self, which):
if which == 'node':
linear_x, linear_y, linear_deg, linear_global = \
self.theta_x, self.theta_y, self.theta_deg, self.theta_global
linear_list = self.theta_list
elif which == 'edge':
linear_x, linear_y, linear_deg, linear_global = \
self.gamma_x, self.gamma_y, self.gamma_deg, self.gamma_global
linear_list = self.gamma_list
def u(node_reprs, edge_reprs):
edge_reprs = filter(lambda x: x is not None, edge_reprs)
y = sum(x['y'] for x in edge_reprs)
node_reprs = node_reprs.copy()
node_reprs['x'] = linear_x(node_reprs['x']) \
+ linear_y(y) \
+ linear_deg(node_reprs['deg']) \
+ linear_global(node_reprs['global']) \
+ sum(linear(node_reprs[i]) \
for i, linear in enumerate(linear_list))
return node_reprs
return u
def forward(self, g, lg, glg):
self.local_aggregate(g)
self.local_aggregate(lg)
self.global_aggregate(g)
self.global_aggregate(lg)
self.multiply_by_degree(g)
self.multiply_by_degree(lg)
# TODO efficiency
for node, reprs in g.nodes.items():
glg.nodes[node].update(reprs)
for node, reprs in lg.nodes.items():
glg.nodes[node].update(reprs)
glg.register_message_func(self.message_func, glg.edges)
glg.register_update_func(self.update_func('node'), g.nodes)
glg.register_update_func(self.update_func('edge'), lg.nodes)
glg.update_all()
# TODO efficiency
for node, reprs in g.nodes.items():
reprs.update(glg.nodes[node])
for node, reprs in lg.nodes.items():
reprs.update(glg.nodes[node])
def aggregate(self, g, z):
z_list = []
g.set_n_repr(z)
g.update_all(fn.copy_src(), fn.sum())
z_list.append(g.get_n_repr())
for i in range(self.radius - 1):
for j in range(2 ** i):
g.update_all(fn.copy_src(), fn.sum())
z_list.append(g.get_n_repr())
return z_list
class GNNModule(nn.Module): def forward(self, g, lg, x, y, deg_g, deg_lg, eid2nid):
def __init__(self, in_feats, out_feats, order, radius): xy = F.embedding(eid2nid, x)
super().__init__()
self.module_list = nn.ModuleList([GLGModule(in_feats, out_feats, radius) x_list = [theta(z) for theta, z in zip(self.theta_list, self.aggregate(g, x))]
for i in range(order)])
g.set_e_repr(y)
g.update_all(fn.copy_edge(), fn.sum())
yx = g.get_n_repr()
def forward(self, pairs, fusions): x = self.theta_x(x) + self.theta_deg(deg_g * x) + sum(x_list) + self.theta_y(yx)
for module, (g, lg), glg in zip(self.module_list, pairs, fusions): x = self.bn_x(x[:, :self.out_feats] + F.relu(x[:, self.out_feats:]))
module(g, lg, glg)
for lhs, rhs in zip(pairs[:-1], pairs[1:]): y_list = [gamma(z) for gamma, z in zip(self.gamma_list, self.aggregate(lg, y))]
for node, reprs in lhs[1].nodes.items(): lg.set_n_repr(xy)
x_rhs = reprs['x'] lg.update_all(fn.copy_src(), fn.sum())
reprs['x'] = x_rhs + rhs[0].nodes[node]['x'] xy = lg.get_n_repr()
rhs[0].nodes[node]['x'] += x_rhs y = self.gamma_y(y) + self.gamma_deg(deg_lg * y) + sum(y_list) + self.gamma_x(xy)
y = self.bn_y(y[:, :self.out_feats] + F.relu(y[:, self.out_feats:]))
return x, y
class GNN(nn.Module): class GNN(nn.Module):
def __init__(self, feats, order, radius, n_classes): def __init__(self, feats, radius, n_classes):
super().__init__()
self.order = order
self.linear = nn.Linear(feats[-1], n_classes)
self.module_list = nn.ModuleList([GNNModule(in_feats, out_feats, order, radius)
for in_feats, out_feats in zip(feats[:-1], feats[1:])])
@staticmethod
def line_graph(g):
lg = nx.line_graph(g)
glg = nx.DiGraph()
glg.add_nodes_from(g.nodes)
glg.add_nodes_from(lg.nodes)
for u, v in g.edges:
glg.add_edge(u, (u, v))
glg.add_edge((u, v), u)
glg.add_edge(v, (u, v))
glg.add_edge((u, v), v)
return lg, glg
@staticmethod
def nx2dgl(g):
deg_dict = dict(nx.degree(g))
z = sum(deg_dict.values())
dgl_g = G.DGLGraph(g)
for node, reprs in dgl_g.nodes.items():
reprs['degree'] = deg_dict[node]
reprs['x'] = th.full((1, 1), reprs['degree'] / z)
reprs.update(g.nodes[node])
return dgl_g
def forward(self, g):
""" """
Parameters Parameters
---------- ----------
g : networkx.DiGraph g : networkx.DiGraph
""" """
pair_list, glg_list = [], [] super(GNN, self).__init__()
dgl_g = self.nx2dgl(g) self.linear = nn.Linear(feats[-1], n_classes)
origin = dgl_g self.module_list = nn.ModuleList([GNNModule(m, n, radius)
for i in range(self.order): for m, n in zip(feats[:-1], feats[1:])])
lg, glg = self.line_graph(g)
dgl_lg = self.nx2dgl(lg)
pair_list.append((dgl_g, copy.deepcopy(dgl_lg)))
glg_list.append(G.DGLGraph(glg))
g = lg
dgl_g = dgl_lg
for module in self.module_list: def forward(self, g, lg, deg_g, deg_lg, eid2nid):
module(pair_list, glg_list) def normalize(x):
x = x - th.mean(x, 0)
x = x / th.sqrt(th.mean(x * x, 0))
return x
return self.linear(th.cat([reprs['x'] for reprs in origin.nodes.values()], 0)) x = normalize(deg_g)
y = normalize(deg_lg)
for module in self.module_list:
x, y = module(g, lg, x, y, deg_g, deg_lg, eid2nid)
return self.linear(x)
"""
By Minjie
"""
from __future__ import division
import math
import numpy as np
import scipy.sparse as sp
import networkx as nx
import matplotlib.pyplot as plt
class SSBM:
def __init__(self, n, k, a=10.0, b=2.0, regime='constant', rng=None):
"""Symmetric Stochastic Block Model.
n - number of nodes
k - number of communities
a - probability scale for intra-community edge
b - probability scale for inter-community edge
regime - If "logaritm", this generates SSBM(n, k, a*log(n)/n, b*log(n)/n)
If "constant", this generates SSBM(n, k, a/n, b/n)
If "mixed", this generates SSBM(n, k, a*log(n)/n, b/n)
"""
self.n = n
self.k = k
if regime == 'logarithm':
if math.sqrt(a) - math.sqrt(b) >= math.sqrt(k):
print('SSBM model with possible exact recovery.')
else:
print('SSBM model with impossible exact recovery.')
self.a = a * math.log(n) / n
self.b = b * math.log(n) / n
elif regime == 'constant':
snr = (a - b) ** 2 / (k * (a + (k - 1) * b))
if snr > 1:
print('SSBM model with possible detection.')
else:
print('SSBM model that may not have detection (snr=%.5f).' % snr)
self.a = a / n
self.b = b / n
elif regime == 'mixed':
self.a = a * math.log(n) / n
self.b = b / n
else:
raise ValueError('Unknown regime: %s' % regime)
if rng is None:
self.rng = np.random.RandomState()
else:
self.rng = rng
self._graph = None
def generate(self):
self.generate_communities()
print('Finished generating communities.')
self.generate_edges()
print('Finished generating edges.')
def generate_communities(self):
nodes = list(range(self.n))
size = self.n // self.k
self.block_size = size
self.comm2node = [nodes[i*size:(i+1)*size] for i in range(self.k)]
self.node2comm = [nid // size for nid in range(self.n)]
def generate_edges(self):
# TODO: dedup edges
us = []
vs = []
# generate intra-comm edges
for i in range(self.k):
sp_mat = sp.random(self.block_size, self.block_size,
density=self.a,
random_state=self.rng,
data_rvs=lambda l: np.ones(l))
u = sp_mat.row + i * self.block_size
v = sp_mat.col + i * self.block_size
us.append(u)
vs.append(v)
# generate inter-comm edges
for i in range(self.k):
for j in range(self.k):
if i == j:
continue
sp_mat = sp.random(self.block_size, self.block_size,
density=self.b,
random_state=self.rng,
data_rvs=lambda l: np.ones(l))
u = sp_mat.row + i * self.block_size
v = sp_mat.col + j * self.block_size
us.append(u)
vs.append(v)
us = np.hstack(us)
vs = np.hstack(vs)
self.sp_mat = sp.coo_matrix((np.ones(us.shape[0]), (us, vs)), shape=(self.n, self.n))
@property
def graph(self):
if self._graph is None:
self._graph = nx.from_scipy_sparse_matrix(self.sp_mat, create_using=nx.DiGraph())
return self._graph
def plot(self):
x = self.sp_mat.row
y = self.sp_mat.col
plt.scatter(x, y, s=0.5, marker='.', c='k')
plt.savefig('ssbm-%d-%d.pdf' % (self.n, self.k))
plt.clf()
# plot out degree distribution
out_degree = [d for _, d in self.graph.out_degree().items()]
plt.hist(out_degree, 100, normed=True)
plt.savefig('ssbm-%d-%d_out_degree.pdf' % (self.n, self.k))
plt.clf()
if __name__ == '__main__':
n = 1000
k = 10
ssbm = SSBM(n, k, regime='mixed', a=4, b=1)
ssbm.generate()
g = ssbm.graph
print('#nodes:', g.number_of_nodes())
print('#edges:', g.number_of_edges())
#ssbm.plot()
#lg = nx.line_graph(g)
# plot degree distribution
#degree = [d for _, d in lg.degree().items()]
#plt.hist(degree, 100, normed=True)
#plt.savefig('lg<ssbm-%d-%d>_degree.pdf' % (n, k))
#plt.clf()
"""
ipython3 test.py -- --features 1 16 16 --gpu -1 --n-classes 5 --n-iterations 10 --n-nodes 10 --order 3 --radius 3
"""
import argparse
import networkx as nx
import torch as th
import torch.nn as nn
import torch.optim as optim
import gnn
parser = argparse.ArgumentParser()
parser.add_argument('--features', nargs='+', type=int)
parser.add_argument('--gpu', type=int)
parser.add_argument('--n-classes', type=int)
parser.add_argument('--n-iterations', type=int)
parser.add_argument('--n-nodes', type=int)
parser.add_argument('--order', type=int)
parser.add_argument('--radius', type=int)
args = parser.parse_args()
if args.gpu < 0:
cuda = False
else:
cuda = True
th.cuda.set_device(args.gpu)
g = nx.barabasi_albert_graph(args.n_nodes, 1).to_directed() # TODO SBM
y = th.multinomial(th.ones(args.n_classes), args.n_nodes, replacement=True)
network = gnn.GNN(args.features, args.order, args.radius, args.n_classes)
if cuda:
network.cuda()
ce = nn.CrossEntropyLoss()
adam = optim.Adam(network.parameters())
for i in range(args.n_iterations):
y_bar = network(g)
loss = ce(y_bar, y)
adam.zero_grad()
loss.backward()
adam.step()
print('[iteration %d]loss %f' % (i, loss))
from __future__ import division
import argparse
from itertools import permutations
import networkx as nx
import torch as th
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import dgl
from dgl.data import SBMMixture
import gnn
import utils
parser = argparse.ArgumentParser()
parser.add_argument('--batch-size', type=int,
help='Batch size', default=1)
parser.add_argument('--gpu', type=int,
help='GPU', default=-1)
parser.add_argument('--n-communities', type=int,
help='Number of communities', default=2)
parser.add_argument('--n-features', type=int,
help='Number of features per layer', default=2)
parser.add_argument('--n-graphs', type=int,
help='Number of graphs', default=6000)
parser.add_argument('--n-iterations', type=int,
help='Number of iterations', default=10000)
parser.add_argument('--n-layers', type=int,
help='Number of layers', default=30)
parser.add_argument('--n-nodes', type=int,
help='Number of nodes', default=1000)
parser.add_argument('--model-path', type=str,
help='Path to the checkpoint of model', default='model')
parser.add_argument('--radius', type=int,
help='Radius', default=3)
args = parser.parse_args()
dev = th.device('cpu') if args.gpu < 0 else th.device('cuda:%d' % args.gpu)
dataset = SBMMixture(args.n_graphs, args.n_nodes, args.n_communities)
loader = utils.cycle(DataLoader(dataset, args.batch_size,
shuffle=True, collate_fn=dataset.collate_fn, drop_last=True))
ones = th.ones(args.n_nodes // args.n_communities)
y_list = [th.cat([th.cat([x * ones for x in p])] * args.batch_size).long().to(dev)
for p in permutations(range(args.n_communities))]
feats = [1] + [args.n_features] * args.n_layers + [args.n_communities]
model = gnn.GNN(feats, args.radius, args.n_communities).to(dev)
opt = optim.Adamax(model.parameters(), lr=0.04)
for i in range(args.n_iterations):
g, lg, deg_g, deg_lg, eid2nid = next(loader)
deg_g = deg_g.to(dev)
deg_lg = deg_lg.to(dev)
eid2nid = eid2nid.to(dev)
y_bar = model(g, lg, deg_g, deg_lg, eid2nid)
loss = min(F.cross_entropy(y_bar, y) for y in y_list)
opt.zero_grad()
loss.backward()
opt.step()
placeholder = '0' * (len(str(args.n_iterations)) - len(str(i)))
print('[iteration %s%d]loss %f' % (placeholder, i, loss))
th.save(model.state_dict(), args.model_path)
def cycle(loader):
while True:
for x in loader:
yield x
from __future__ import absolute_import from __future__ import absolute_import
import ctypes import ctypes
import scipy as sp
import torch as th import torch as th
from torch.utils import dlpack from torch.utils import dlpack
...@@ -28,12 +29,19 @@ tensor = th.tensor ...@@ -28,12 +29,19 @@ tensor = th.tensor
sparse_tensor = th.sparse.FloatTensor sparse_tensor = th.sparse.FloatTensor
sum = th.sum sum = th.sum
max = th.max max = th.max
stack = th.stack
def astype(a, ty): def astype(a, ty):
return a.type(ty) return a.type(ty)
def pack(tensors): def asnumpy(a):
return th.cat(tensors) return a.cpu().numpy()
def from_numpy(np_data):
return th.from_numpy(np_data)
def pack(tensors, dim=0):
return th.cat(tensors, dim)
def unpack(x, indices_or_sections=1): def unpack(x, indices_or_sections=1):
return th.split(x, indices_or_sections) return th.split(x, indices_or_sections)
...@@ -44,9 +52,6 @@ def shape(x): ...@@ -44,9 +52,6 @@ def shape(x):
def dtype(x): def dtype(x):
return x.dtype return x.dtype
def asnumpy(a):
return a.cpu().numpy()
unique = th.unique unique = th.unique
def gather_row(data, row_index): def gather_row(data, row_index):
......
...@@ -4,6 +4,7 @@ from __future__ import absolute_import ...@@ -4,6 +4,7 @@ from __future__ import absolute_import
from . import citation_graph as citegrh from . import citation_graph as citegrh
from .tree import * from .tree import *
from .utils import * from .utils import *
from .sbm import SBMMixture
def register_data_args(parser): def register_data_args(parser):
parser.add_argument("--dataset", type=str, required=True, parser.add_argument("--dataset", type=str, required=True,
......
import math
import os
import pickle
import numpy as np
import numpy.random as npr
import scipy as sp
import networkx as nx
from torch.utils.data import Dataset
from .. import backend as F
from ..batched_graph import batch
from ..graph import DGLGraph
from ..utils import Index
def sbm(n_blocks, block_size, p, q, rng=None):
""" (Symmetric) Stochastic Block Model
Parameters
----------
n_blocks : int
Number of blocks.
block_size : int
Block size.
p : float
Probability for intra-community edge.
q : float
Probability for inter-community edge.
Returns
-------
scipy sparse matrix
The adjacency matrix of generated graph.
"""
n = n_blocks * block_size
p /= n
q /= n
rng = np.random.RandomState() if rng is None else rng
rows = []
cols = []
for i in range(n_blocks):
for j in range(i, n_blocks):
density = p if i == j else q
block = sp.sparse.random(block_size, block_size, density,
random_state=rng, data_rvs=lambda n: np.ones(n))
rows.append(block.row + i * block_size)
cols.append(block.col + j * block_size)
rows = np.hstack(rows)
cols = np.hstack(cols)
a = sp.sparse.coo_matrix((np.ones(rows.shape[0]), (rows, cols)), shape=(n, n))
adj = sp.sparse.triu(a) + sp.sparse.triu(a, 1).transpose()
return adj
class SBMMixture(Dataset):
""" Symmetric Stochastic Block Model Mixture
Please refer to Appendix C of "Supervised Community Detection with Hierarchical Graph Neural Networks" (https://arxiv.org/abs/1705.08415) for details.
Parameters
----------
n_graphs : int
Number of graphs.
n_nodes : int
Number of nodes.
n_communities : int
Number of communities.
k : int, optional
Multiplier.
avg_deg : int, optional
Average degree.
p : callable or str, optional
Random density generator.
rng : numpy.random.RandomState, optional
Random number generator.
"""
def __init__(self, n_graphs, n_nodes, n_communities,
k=2, avg_deg=3, p='Appendix C', rng=None):
super(SBMMixture, self).__init__()
self._n_nodes = n_nodes
assert n_nodes % n_communities == 0
block_size = n_nodes // n_communities
if type(p) is str:
p = {'Appendix C' : self._appendix_c}[p]
self._k = k
self._avg_deg = avg_deg
self._gs = [DGLGraph() for i in range(n_graphs)]
adjs = [sbm(n_communities, block_size, *p()) for i in range(n_graphs)]
for g, adj in zip(self._gs, adjs):
g.from_scipy_sparse_matrix(adj)
self._lgs = [g.line_graph() for g in self._gs]
in_degrees = lambda g: g.in_degrees(Index(F.arange(g.number_of_nodes(),
dtype=F.int64))).unsqueeze(1).float()
self._g_degs = [in_degrees(g) for g in self._gs]
self._lg_degs = [in_degrees(lg) for lg in self._lgs]
self._eid2nids = list(zip(*[g.edges(sorted=True) for g in self._gs]))[0]
def __len__(self):
return len(self._gs)
def __getitem__(self, idx):
return self._gs[idx], self._lgs[idx], \
self._g_degs[idx], self._lg_degs[idx], self._eid2nids[idx]
def _appendix_c(self):
q = npr.uniform(0, self._avg_deg - math.sqrt(self._avg_deg))
p = self._k * self._avg_deg - q
return p, q
def collate_fn(self, x):
g, lg, deg_g, deg_lg, eid2nid = zip(*x)
g_batch = batch(g)
lg_batch = batch(lg)
degg_batch = F.pack(deg_g)
deglg_batch = F.pack(deg_lg)
eid2nid_batch = F.pack([x + i * self._n_nodes for i, x in enumerate(eid2nid)])
return g_batch, lg_batch, degg_batch, deglg_batch, eid2nid_batch
...@@ -466,6 +466,18 @@ class DGLGraph(object): ...@@ -466,6 +466,18 @@ class DGLGraph(object):
for attr in edge_attrs: for attr in edge_attrs:
self._edge_frame[attr] = _batcher(attr_dict[attr]) self._edge_frame[attr] = _batcher(attr_dict[attr])
def from_scipy_sparse_matrix(self, a):
""" Convert from scipy sparse matrix.
Parameters
----------
a : scipy sparse matrix
The graph's adjacency matrix
"""
self.clear()
self._graph.from_scipy_sparse_matrix(a)
self._msg_graph.add_nodes(self._graph.number_of_nodes())
def node_attr_schemes(self): def node_attr_schemes(self):
"""Return the node attribute schemes. """Return the node attribute schemes.
...@@ -1220,3 +1232,56 @@ class DGLGraph(object): ...@@ -1220,3 +1232,56 @@ class DGLGraph(object):
[sg._parent_eid for sg in to_merge], [sg._parent_eid for sg in to_merge],
self._edge_frame.num_rows, self._edge_frame.num_rows,
reduce_func) reduce_func)
def adjacency_matrix(self, ctx=None):
"""Return the adjacency matrix representation of this graph.
Parameters
----------
ctx : optional
The context of returned adjacency matrix.
Returns
-------
sparse_tensor
The adjacency matrix.
"""
return self._graph.adjacency_matrix().get(ctx)
def incidence_matrix(self, oriented=False, ctx=None):
"""Return the incidence matrix representation of this graph.
Parameters
----------
oriented : bool, optional
Whether the returned incidence matrix is oriented.
ctx : optional
The context of returned incidence matrix.
Returns
-------
sparse_tensor
The incidence matrix.
"""
return self._graph.incidence_matrix(oriented).get(ctx)
def line_graph(self, backtracking=True, shared=False):
"""Return the line graph of this graph.
Parameters
----------
backtracking : bool, optional
Whether the returned line graph is backtracking.
shared : bool, optional
Whether the returned line graph shares representations with `self`.
Returns
-------
DGLGraph
The line graph of this graph.
"""
graph_data = self._graph.line_graph(backtracking)
node_frame = self._edge_frame if shared else None
return DGLGraph(graph_data, node_frame)
...@@ -3,6 +3,7 @@ from __future__ import absolute_import ...@@ -3,6 +3,7 @@ from __future__ import absolute_import
import ctypes import ctypes
import numpy as np import numpy as np
import networkx as nx import networkx as nx
import scipy.sparse as sp
from ._ffi.base import c_array from ._ffi.base import c_array
from ._ffi.function import _init_api from ._ffi.function import _init_api
...@@ -407,6 +408,47 @@ class GraphIndex(object): ...@@ -407,6 +408,47 @@ class GraphIndex(object):
self._cache['adj'] = utils.CtxCachedObject(lambda ctx: F.to_context(mat, ctx)) self._cache['adj'] = utils.CtxCachedObject(lambda ctx: F.to_context(mat, ctx))
return self._cache['adj'] return self._cache['adj']
def incidence_matrix(self, oriented=False):
"""Return the incidence matrix representation of this graph.
Parameters
----------
oriented : bool, optional (default=False)
Whether the returned incidence matrix is oriented.
Returns
-------
utils.CtxCachedObject
An object that returns tensor given context.
"""
key = ('oriented ' if oriented else '') + 'incidence matrix'
if not key in self._cache:
src, dst, _ = self.edges(sorted=False)
src = src.tousertensor()
dst = dst.tousertensor()
m = self.number_of_edges()
eid = F.arange(m, dtype=F.int64)
row = F.pack([src, dst])
col = F.pack([eid, eid])
idx = F.stack([row, col])
diagonal = (src == dst)
if oriented:
x = -F.ones((m,))
y = F.ones((m,))
x[diagonal] = 0
y[diagonal] = 0
dat = F.pack([x, y])
else:
x = F.ones((m,))
x[diagonal] = 0
dat = F.pack([x, x])
n = self.number_of_nodes()
mat = F.sparse_tensor(idx, dat, [n, m])
self._cache[key] = utils.CtxCachedObject(lambda ctx: F.to_context(mat, ctx))
return self._cache[key]
def to_networkx(self): def to_networkx(self):
"""Convert to networkx graph. """Convert to networkx graph.
...@@ -459,6 +501,37 @@ class GraphIndex(object): ...@@ -459,6 +501,37 @@ class GraphIndex(object):
dst = utils.toindex(dst) dst = utils.toindex(dst)
self.add_edges(src, dst) self.add_edges(src, dst)
def from_scipy_sparse_matrix(self, adj):
"""Convert from scipy sparse matrix.
Parameters
----------
adj : scipy sparse matrix
"""
self.clear()
self.add_nodes(adj.shape[0])
adj_coo = adj.tocoo()
src = utils.toindex(adj_coo.row)
dst = utils.toindex(adj_coo.col)
self.add_edges(src, dst)
def line_graph(self, backtracking=True):
"""Return the line graph of this graph.
Parameters
----------
backtracking : bool, optional (default=False)
Whether (i, j) ~ (j, i) in L(G).
(i, j) ~ (j, i) is the behavior of networkx.line_graph.
Returns
-------
GraphIndex
The line graph of this graph.
"""
handle = _CAPI_DGLGraphLineGraph(self._handle, backtracking)
return GraphIndex(handle)
def disjoint_union(graphs): def disjoint_union(graphs):
"""Return a disjoint union of the input graphs. """Return a disjoint union of the input graphs.
......
...@@ -325,4 +325,15 @@ TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes") ...@@ -325,4 +325,15 @@ TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLDisjointPartitionBySizes")
} }
*rv = ptr_array; *rv = ptr_array;
}); });
TVM_REGISTER_GLOBAL("graph_index._CAPI_DGLGraphLineGraph")
.set_body([] (TVMArgs args, TVMRetValue* rv) {
GraphHandle ghandle = args[0];
bool backtracking = args[1];
const Graph* gptr = static_cast<Graph*>(ghandle);
Graph* lgptr = new Graph();
*lgptr = GraphOp::LineGraph(gptr, backtracking);
GraphHandle lghandle = lgptr;
*rv = lghandle;
});
} // namespace dgl } // namespace dgl
...@@ -4,6 +4,48 @@ ...@@ -4,6 +4,48 @@
namespace dgl { namespace dgl {
Graph GraphOp::LineGraph(const Graph* g, bool backtracking){
typedef std::pair<dgl_id_t, dgl_id_t> entry;
typedef std::map<dgl_id_t, std::vector<entry>> csm; // Compressed Sparse Matrix
csm adj;
std::vector<entry> vec;
for (size_t i = 0; i != g->all_edges_src_.size(); ++i) {
auto u = g->all_edges_src_[i];
auto v = g->all_edges_dst_[i];
auto ret = adj.insert(csm::value_type(u, vec));
(ret.first)->second.push_back(std::make_pair(v, i));
}
std::vector<dgl_id_t> lg_src, lg_dst;
for (size_t i = 0; i != g->all_edges_src_.size(); ++i) {
auto u = g->all_edges_src_[i];
auto v = g->all_edges_dst_[i];
auto j = adj.find(v);
if (j != adj.end()) {
for (size_t k = 0; k != j->second.size(); ++k) {
if (backtracking || (!backtracking && j->second[k].first != u)) {
lg_src.push_back(i);
lg_dst.push_back(j->second[k].second);
}
}
}
}
const int64_t len = lg_src.size();
IdArray src = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
IdArray dst = IdArray::Empty({len}, DLDataType{kDLInt, 64, 1}, DLContext{kDLCPU, 0});
int64_t* src_ptr = static_cast<int64_t*>(src->data);
int64_t* dst_ptr = static_cast<int64_t*>(dst->data);
std::copy(lg_src.begin(), lg_src.end(), src_ptr);
std::copy(lg_dst.begin(), lg_dst.end(), dst_ptr);
Graph lg;
lg.AddVertices(g->NumEdges());
lg.AddEdges(src, dst);
return lg;
}
Graph GraphOp::DisjointUnion(std::vector<const Graph*> graphs) { Graph GraphOp::DisjointUnion(std::vector<const Graph*> graphs) {
Graph rst; Graph rst;
uint64_t cumsum = 0; uint64_t cumsum = 0;
......
...@@ -9,20 +9,18 @@ def check_eq(a, b): ...@@ -9,20 +9,18 @@ def check_eq(a, b):
return a.shape == b.shape and np.allclose(a.numpy(), b.numpy()) return a.shape == b.shape and np.allclose(a.numpy(), b.numpy())
def test_line_graph(): def test_line_graph():
# FIXME
return
"""
N = 5 N = 5
G = dgl.DGLGraph(nx.star_graph(N)) G = dgl.DGLGraph(nx.star_graph(N))
G.set_e_repr(th.randn((2*N, D))) G.set_e_repr(th.randn((2 * N, D)))
n_edges = G.number_of_edges() n_edges = G.number_of_edges()
L = dgl.line_graph(G) L = G.line_graph(shared=True)
assert L.number_of_nodes() == 2*N assert L.number_of_nodes() == 2 * N
L.set_n_repr(th.randn((2 * N, D)))
# update node features on line graph should reflect to edge features on # update node features on line graph should reflect to edge features on
# original graph. # original graph.
u = [0, 0, 2, 3] u = [0, 0, 2, 3]
v = [1, 2, 0, 0] v = [1, 2, 0, 0]
eid = G.get_edge_id(u, v) eid = G.edge_ids(u, v)
L.set_n_repr(th.zeros((4, D)), eid) L.set_n_repr(th.zeros((4, D)), eid)
assert check_eq(G.get_e_repr(u, v), th.zeros((4, D))) assert check_eq(G.get_e_repr(u, v), th.zeros((4, D)))
...@@ -31,23 +29,18 @@ def test_line_graph(): ...@@ -31,23 +29,18 @@ def test_line_graph():
data = th.randn(n_edges, D) data = th.randn(n_edges, D)
L.set_n_repr({'w': data}) L.set_n_repr({'w': data})
assert check_eq(G.get_e_repr()['w'], data) assert check_eq(G.get_e_repr()['w'], data)
"""
def test_no_backtracking(): def test_no_backtracking():
# FIXME
return
"""
N = 5 N = 5
G = dgl.DGLGraph(nx.star_graph(N)) G = dgl.DGLGraph(nx.star_graph(N))
G.set_e_repr(th.randn((2*N, D))) G.set_e_repr(th.randn((2 * N, D)))
L = dgl.line_graph(G, no_backtracking=True) L = G.line_graph(backtracking=False)
assert L.number_of_nodes() == 2*N assert L.number_of_nodes() == 2 * N
for i in range(1, N): for i in range(1, N):
e1 = G.get_edge_id(0, i) e1 = G.edge_id(0, i)
e2 = G.get_edge_id(i, 0) e2 = G.edge_id(i, 0)
assert not L.has_edge(e1, e2) assert not L.has_edge(e1, e2)
assert not L.has_edge(e2, e1) assert not L.has_edge(e2, e1)
"""
if __name__ == '__main__': if __name__ == '__main__':
test_line_graph() test_line_graph()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment