Unverified Commit 49c4a9e4 authored by Chao Ma's avatar Chao Ma Committed by GitHub
Browse files

[DEMO] Remove duplicate code for sampling (#557)

* update

* update

* re-use single-machine code

* update

* use relative path

* update

* update

* update

* add __init__.py

* add __init__.py

* import sys, os

* fix typo

* update
parent 28379f92
import os, sys
import argparse, time, math
import numpy as np
import mxnet as mx
......@@ -6,137 +7,9 @@ import dgl
import dgl.function as fn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
class NodeUpdate(gluon.Block):
def __init__(self, layer_id, in_feats, out_feats, dropout, activation=None, test=False, concat=False):
super(NodeUpdate, self).__init__()
self.layer_id = layer_id
self.dropout = dropout
self.test = test
self.concat = concat
with self.name_scope():
self.dense = gluon.nn.Dense(out_feats, in_units=in_feats)
self.activation = activation
def forward(self, node):
h = node.data['h']
norm = node.data['norm']
if self.test:
h = h * norm
else:
agg_history_str = 'agg_h_{}'.format(self.layer_id-1)
agg_history = node.data[agg_history_str]
subg_norm = node.data['subg_norm']
# control variate
h = h * subg_norm + agg_history * norm
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = self.dense(h)
if self.concat:
h = mx.nd.concat(h, self.activation(h))
elif self.activation:
h = self.activation(h)
return {'activation': h}
class GCNSampling(gluon.Block):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
**kwargs):
super(GCNSampling, self).__init__(**kwargs)
self.dropout = dropout
self.n_layers = n_layers
with self.name_scope():
self.layers = gluon.nn.Sequential()
# input layer
self.dense = gluon.nn.Dense(n_hidden, in_units=in_feats)
self.activation = activation
# hidden layers
for i in range(1, n_layers):
skip_start = (i == self.n_layers-1)
self.layers.add(NodeUpdate(i, n_hidden, n_hidden, dropout, activation, concat=skip_start))
# output layer
self.layers.add(NodeUpdate(n_layers, 2*n_hidden, n_classes, dropout))
def forward(self, nf):
h = nf.layers[0].data['preprocess']
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = self.dense(h)
skip_start = (0 == self.n_layers-1)
if skip_start:
h = mx.nd.concat(h, self.activation(h))
else:
h = self.activation(h)
for i, layer in enumerate(self.layers):
new_history = h.copy().detach()
history_str = 'h_{}'.format(i)
history = nf.layers[i].data[history_str]
h = h - history
nf.layers[i].data['h'] = h
nf.block_compute(i,
fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'),
layer)
h = nf.layers[i+1].data.pop('activation')
# update history
if i < nf.num_layers-1:
nf.layers[i].data[history_str] = new_history
return h
class GCNInfer(gluon.Block):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
**kwargs):
super(GCNInfer, self).__init__(**kwargs)
self.n_layers = n_layers
with self.name_scope():
self.layers = gluon.nn.Sequential()
# input layer
self.dense = gluon.nn.Dense(n_hidden, in_units=in_feats)
self.activation = activation
# hidden layers
for i in range(1, n_layers):
skip_start = (i == self.n_layers-1)
self.layers.add(NodeUpdate(i, n_hidden, n_hidden, 0, activation, True, concat=skip_start))
# output layer
self.layers.add(NodeUpdate(n_layers, 2*n_hidden, n_classes, 0, None, True))
def forward(self, nf):
h = nf.layers[0].data['preprocess']
h = self.dense(h)
skip_start = (0 == self.n_layers-1)
if skip_start:
h = mx.nd.concat(h, self.activation(h))
else:
h = self.activation(h)
for i, layer in enumerate(self.layers):
nf.layers[i].data['h'] = h
nf.block_compute(i,
fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'),
layer)
h = nf.layers[i+1].data.pop('activation')
return h
parentdir=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, parentdir)
from gcn_cv_sc import NodeUpdate, GCNSampling, GCNInfer
def gcn_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples, distributed):
......
import os, sys
import argparse, time, math
import numpy as np
import mxnet as mx
......@@ -7,106 +8,9 @@ import dgl
import dgl.function as fn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
class NodeUpdate(gluon.Block):
def __init__(self, in_feats, out_feats, activation=None, concat=False):
super(NodeUpdate, self).__init__()
self.dense = gluon.nn.Dense(out_feats, in_units=in_feats)
self.activation = activation
self.concat = concat
def forward(self, node):
h = node.data['h']
h = h * node.data['norm']
h = self.dense(h)
# skip connection
if self.concat:
h = mx.nd.concat(h, self.activation(h))
elif self.activation:
h = self.activation(h)
return {'activation': h}
class GCNSampling(gluon.Block):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
dropout,
**kwargs):
super(GCNSampling, self).__init__(**kwargs)
self.dropout = dropout
self.n_layers = n_layers
with self.name_scope():
self.layers = gluon.nn.Sequential()
# input layer
skip_start = (0 == n_layers-1)
self.layers.add(NodeUpdate(in_feats, n_hidden, activation, concat=skip_start))
# hidden layers
for i in range(1, n_layers):
skip_start = (i == n_layers-1)
self.layers.add(NodeUpdate(n_hidden, n_hidden, activation, concat=skip_start))
# output layer
self.layers.add(NodeUpdate(2*n_hidden, n_classes))
def forward(self, nf):
nf.layers[0].data['activation'] = nf.layers[0].data['features']
for i, layer in enumerate(self.layers):
h = nf.layers[i].data.pop('activation')
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
nf.layers[i].data['h'] = h
degs = nf.layer_in_degree(i + 1).astype('float32').as_in_context(h.context)
nf.layers[i + 1].data['norm'] = mx.nd.expand_dims(1./degs, 1)
nf.block_compute(i,
fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'),
layer)
h = nf.layers[-1].data.pop('activation')
return h
class GCNInfer(gluon.Block):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
activation,
**kwargs):
super(GCNInfer, self).__init__(**kwargs)
self.n_layers = n_layers
with self.name_scope():
self.layers = gluon.nn.Sequential()
# input layer
skip_start = (0 == n_layers-1)
self.layers.add(NodeUpdate(in_feats, n_hidden, activation, concat=skip_start))
# hidden layers
for i in range(1, n_layers):
skip_start = (i == n_layers-1)
self.layers.add(NodeUpdate(n_hidden, n_hidden, activation, concat=skip_start))
# output layer
self.layers.add(NodeUpdate(2*n_hidden, n_classes))
def forward(self, nf):
nf.layers[0].data['activation'] = nf.layers[0].data['features']
for i, layer in enumerate(self.layers):
h = nf.layers[i].data.pop('activation')
nf.layers[i].data['h'] = h
nf.block_compute(i,
fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'),
layer)
return nf.layers[-1].data.pop('activation')
parentdir=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, parentdir)
from gcn_ns_sc import NodeUpdate, GCNSampling, GCNInfer
def gcn_ns_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples):
......
import os, sys
import argparse, time, math
import numpy as np
import mxnet as mx
......@@ -7,177 +8,10 @@ import dgl
import dgl.function as fn
from dgl import DGLGraph
from dgl.data import register_data_args, load_data
class GraphSAGELayer(gluon.Block):
def __init__(self,
in_feats,
hidden,
out_feats,
dropout,
last=False,
**kwargs):
super(GraphSAGELayer, self).__init__(**kwargs)
self.last = last
self.dropout = dropout
with self.name_scope():
self.dense1 = gluon.nn.Dense(hidden, in_units=in_feats)
self.layer_norm1 = gluon.nn.LayerNorm(in_channels=hidden)
self.dense2 = gluon.nn.Dense(out_feats, in_units=hidden)
if not self.last:
self.layer_norm2 = gluon.nn.LayerNorm(in_channels=out_feats)
def forward(self, h):
h = self.dense1(h)
h = self.layer_norm1(h)
h = mx.nd.relu(h)
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = self.dense2(h)
if not self.last:
h = self.layer_norm2(h)
h = mx.nd.relu(h)
return h
class NodeUpdate(gluon.Block):
def __init__(self, layer_id, in_feats, out_feats, hidden, dropout,
test=False, last=False):
super(NodeUpdate, self).__init__()
self.layer_id = layer_id
self.dropout = dropout
self.test = test
self.last = last
with self.name_scope():
self.layer = GraphSAGELayer(in_feats, hidden, out_feats, dropout, last)
def forward(self, node):
h = node.data['h']
norm = node.data['norm']
# activation from previous layer of myself
self_h = node.data['self_h']
if self.test:
h = (h - self_h) * norm
# graphsage
h = mx.nd.concat(h, self_h)
else:
agg_history_str = 'agg_h_{}'.format(self.layer_id-1)
agg_history = node.data[agg_history_str]
# normalization constant
subg_norm = node.data['subg_norm']
# delta_h (h - history) from previous layer of myself
self_delta_h = node.data['self_delta_h']
# control variate
h = (h - self_delta_h) * subg_norm + agg_history * norm
# graphsage
h = mx.nd.concat(h, self_h)
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = self.layer(h)
return {'activation': h}
class GraphSAGETrain(gluon.Block):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
dropout,
**kwargs):
super(GraphSAGETrain, self).__init__(**kwargs)
self.dropout = dropout
with self.name_scope():
self.layers = gluon.nn.Sequential()
# input layer
self.input_layer = GraphSAGELayer(2*in_feats, n_hidden, n_hidden, dropout)
# hidden layers
for i in range(1, n_layers):
self.layers.add(NodeUpdate(i, 2*n_hidden, n_hidden, n_hidden, dropout))
# output layer
self.layers.add(NodeUpdate(n_layers, 2*n_hidden, n_classes, n_hidden, dropout, last=True))
def forward(self, nf):
h = nf.layers[0].data['preprocess']
features = nf.layers[0].data['features']
h = mx.nd.concat(h, features)
if self.dropout:
h = mx.nd.Dropout(h, p=self.dropout)
h = self.input_layer(h)
for i, layer in enumerate(self.layers):
parent_nid = dgl.utils.toindex(nf.layer_parent_nid(i+1))
layer_nid = nf.map_from_parent_nid(i, parent_nid).as_in_context(h.context)
self_h = h[layer_nid]
# activation from previous layer of myself, used in graphSAGE
nf.layers[i+1].data['self_h'] = self_h
new_history = h.copy().detach()
history_str = 'h_{}'.format(i)
history = nf.layers[i].data[history_str]
# delta_h used in control variate
delta_h = h - history
# delta_h from previous layer of the nodes in (i+1)-th layer, used in control variate
nf.layers[i+1].data['self_delta_h'] = delta_h[layer_nid]
nf.layers[i].data['h'] = delta_h
nf.block_compute(i,
fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'),
layer)
h = nf.layers[i+1].data.pop('activation')
# update history
if i < nf.num_layers-1:
nf.layers[i].data[history_str] = new_history
return h
class GraphSAGEInfer(gluon.Block):
def __init__(self,
in_feats,
n_hidden,
n_classes,
n_layers,
**kwargs):
super(GraphSAGEInfer, self).__init__(**kwargs)
with self.name_scope():
self.layers = gluon.nn.Sequential()
# input layer
self.input_layer = GraphSAGELayer(2*in_feats, n_hidden, n_hidden, 0)
# hidden layers
for i in range(1, n_layers):
self.layers.add(NodeUpdate(i, 2*n_hidden, n_hidden, n_hidden, 0, True))
# output layer
self.layers.add(NodeUpdate(n_layers, 2*n_hidden, n_classes, n_hidden, 0, True, last=True))
def forward(self, nf):
h = nf.layers[0].data['preprocess']
features = nf.layers[0].data['features']
h = mx.nd.concat(h, features)
h = self.input_layer(h)
for i, layer in enumerate(self.layers):
nf.layers[i].data['h'] = h
parent_nid = dgl.utils.toindex(nf.layer_parent_nid(i+1))
layer_nid = nf.map_from_parent_nid(i, parent_nid).as_in_context(h.context)
# activation from previous layer of the nodes in (i+1)-th layer, used in graphSAGE
self_h = h[layer_nid]
nf.layers[i+1].data['self_h'] = self_h
nf.block_compute(i,
fn.copy_src(src='h', out='m'),
fn.sum(msg='m', out='h'),
layer)
h = nf.layers[i+1].data.pop('activation')
return h
parentdir=os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.insert(0, parentdir)
from graphsage_cv import GraphSAGELayer, NodeUpdate
from graphsage_cv import GraphSAGETrain, GraphSAGEInfer
def graphsage_cv_train(g, ctx, args, n_classes, train_nid, test_nid, n_test_samples, distributed):
n0_feats = g.nodes[0].data['features']
......
......@@ -233,6 +233,7 @@ def main(args):
num_neighbors,
neighbor_type='in',
shuffle=True,
num_workers=32,
num_hops=n_layers,
seed_nodes=train_nid):
for i in range(n_layers):
......@@ -272,6 +273,7 @@ def main(args):
for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
g.number_of_nodes(),
neighbor_type='in',
num_workers=32,
num_hops=n_layers,
seed_nodes=test_nid):
node_embed_names = [['preprocess']]
......
......@@ -197,6 +197,7 @@ def main(args):
args.num_neighbors,
neighbor_type='in',
shuffle=True,
num_workers=32,
num_hops=args.n_layers+1,
seed_nodes=train_nid):
nf.copy_from_parent()
......@@ -219,6 +220,7 @@ def main(args):
for nf in dgl.contrib.sampling.NeighborSampler(g, args.test_batch_size,
g.number_of_nodes(),
neighbor_type='in',
num_workers=32,
num_hops=args.n_layers+1,
seed_nodes=test_nid):
nf.copy_from_parent()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment