Unverified Commit 4ef01dbb authored by xiang song(charlie.song)'s avatar xiang song(charlie.song) Committed by GitHub
Browse files

[Example] Rgcn support ogbn-mag dataset. (#1812)



* rgcn support ogbn-mag dataset

* upd

* multi-gpu val and test

* Fix

* fix

* Add support for ogbn-mag

* Fix

* Fix

* Fix

* Fix

* Add layer_norm

* update

* Fix merge

* Clean some code

* update Readme

* upd
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-68-185.ec2.internal>
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-87-240.ec2.internal>
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-51-214.ec2.internal>
parent e7515773
......@@ -40,7 +40,7 @@ python3 entity_classify.py -d am --n-bases=40 --n-hidden=10 --l2norm=5e-4 --test
### Entity Classification with minibatch
AIFB: accuracy avg(5 runs) 90.56%, best 94.44% (DGL)
```
python3 entity_classify_mp.py -d aifb --testing --gpu 0 --fanout=20 --batch-size 128
python3 entity_classify_mp.py -d aifb --testing --gpu 0 --fanout='20,20' --batch-size 128
```
MUTAG: accuracy avg(5 runs) 66.77%, best 69.12% (DGL)
......@@ -49,16 +49,30 @@ python3 entity_classify_mp.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gp
```
BGS: accuracy avg(5 runs) 91.72%, best 96.55% (DGL)
```
python3 entity_classify_mp.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout 40 --n-epochs=40 --batch-size=128
python3 entity_classify_mp.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout '40,40' --n-epochs=40 --batch-size=128
```
AM: accuracy avg(5 runs) 88.28%, best 90.40% (DGL)
```
python3 entity_classify_mp.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout 35 --batch-size 256 --lr 1e-2 --n-hidden 16 --use-self-loop --n-epochs=40
python3 entity_classify_mp.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout '35,35' --batch-size 256 --lr 1e-2 --n-hidden 16 --use-self-loop --n-epochs=40
```
### Entity Classification on OGBN-MAG
Test-bd: P3-8xlarge
OGBN-MAG accuracy 46.22
```
python3 entity_classify_mp.py -d ogbn-mag --testing --fanout='25,30' --batch-size 512 --n-hidden 64 --lr 0.01 --num-worker 0 --eval-batch-size 8 --low-mem --gpu 0,1,2,3,4,5,6,7 --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --mix-cpu-gpu --node-feats --layer-norm
```
OGBN-MAG without node-feats 43.24
```
python3 entity_classify_mp.py -d ogbn-mag --testing --fanout='25,25' --batch-size 256 --n-hidden 64 --lr 0.01 --num-worker 0 --eval-batch-size 8 --low-mem --gpu 0,1,2,3,4,5,6,7 --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --mix-cpu-gpu --layer-norm
```
Test-bd: P2-8xlarge
### Link Prediction
FB15k-237: MRR 0.151 (DGL), 0.158 (paper)
```
......
This diff is collapsed.
......@@ -61,7 +61,7 @@ class RelGraphEmbedLayer(nn.Module):
num_of_ntype : int
Number of node types
input_size : list of int
A list of input feature size for each node type. If None, we then
A list of input feature size for each node type. If None, we then
treat certain input feature as an one-hot encoding feature.
embed_size : int
Output embed size
......@@ -91,16 +91,15 @@ class RelGraphEmbedLayer(nn.Module):
for ntype in range(num_of_ntype):
if input_size[ntype] is not None:
loc = node_tids == ntype
input_emb_size = node_tids[loc].shape[0]
input_emb_size = input_size[ntype].shape[1]
embed = nn.Parameter(th.Tensor(input_emb_size, self.embed_size))
nn.init.xavier_uniform_(embed, gain=nn.init.calculate_gain('relu'))
nn.init.xavier_uniform_(embed)
self.embeds[str(ntype)] = embed
self.node_embeds = th.nn.Embedding(node_tids.shape[0], self.embed_size, sparse=self.sparse_emb)
nn.init.uniform_(self.node_embeds.weight, -1.0, 1.0)
def forward(self, node_ids, node_tids, features):
def forward(self, node_ids, node_tids, type_ids, features):
"""Forward computation
Parameters
----------
......@@ -111,19 +110,21 @@ class RelGraphEmbedLayer(nn.Module):
features : list of features
list of initial features for nodes belong to different node type.
If None, the corresponding features is an one-hot encoding feature,
else use the features directly as input feature and matmul a
else use the features directly as input feature and matmul a
projection matrix.
Returns
-------
tensor
embeddings as the input of the next layer
"""
tsd_idx = node_ids < self.num_nodes
tsd_ids = node_ids[tsd_idx]
embeds = self.node_embeds(tsd_ids)
tsd_ids = node_ids.to(self.node_embeds.weight.device)
embeds = th.empty(node_ids.shape[0], self.embed_size, device=self.dev_id)
for ntype in range(self.num_of_ntype):
if features[ntype] is not None:
loc = node_tids == ntype
embeds[loc] = features[ntype] @ self.embeds[str(ntype)]
embeds[loc] = features[ntype][type_ids[loc]].to(self.dev_id) @ self.embeds[str(ntype)].to(self.dev_id)
else:
loc = node_tids == ntype
embeds[loc] = self.node_embeds(tsd_ids[loc]).to(self.dev_id)
return embeds.to(self.dev_id)
return embeds
......@@ -61,6 +61,8 @@ class RelGraphConv(gluon.Block):
Default: False.
dropout : float, optional
Dropout rate. Default: 0.0
layer_norm: float, optional
Add layer norm. Default: False
"""
def __init__(self,
in_feat,
......@@ -72,7 +74,8 @@ class RelGraphConv(gluon.Block):
activation=None,
self_loop=False,
low_mem=False,
dropout=0.0):
dropout=0.0,
layer_norm=False):
super(RelGraphConv, self).__init__()
self.in_feat = in_feat
self.out_feat = out_feat
......@@ -86,6 +89,7 @@ class RelGraphConv(gluon.Block):
self.self_loop = self_loop
assert low_mem is False, 'MXNet currently does not support low-memory implementation.'
assert layer_norm is False, 'MXNet currently does not support layer norm.'
if regularizer == "basis":
# add basis weights
......
......@@ -59,6 +59,8 @@ class RelGraphConv(nn.Module):
Turn it on when you encounter OOM problem during training or evaluation.
dropout : float, optional
Dropout rate. Default: 0.0
layer_norm: float, optional
Add layer norm. Default: False
"""
def __init__(self,
in_feat,
......@@ -70,7 +72,8 @@ class RelGraphConv(nn.Module):
activation=None,
self_loop=False,
low_mem=False,
dropout=0.0):
dropout=0.0,
layer_norm=False):
super(RelGraphConv, self).__init__()
self.in_feat = in_feat
self.out_feat = out_feat
......@@ -83,6 +86,7 @@ class RelGraphConv(nn.Module):
self.activation = activation
self.self_loop = self_loop
self.low_mem = low_mem
self.layer_norm = layer_norm
if regularizer == "basis":
# add basis weights
......@@ -120,6 +124,10 @@ class RelGraphConv(nn.Module):
self.h_bias = nn.Parameter(th.Tensor(out_feat))
nn.init.zeros_(self.h_bias)
# layer norm
if self.layer_norm:
self.layer_norm_weight = nn.LayerNorm(n_hidden, elementwise_affine=True)
# weight for self loop
if self.self_loop:
self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat))
......@@ -219,6 +227,8 @@ class RelGraphConv(nn.Module):
g.update_all(self.message_func, fn.sum(msg='msg', out='h'))
# apply bias and activation
node_repr = g.dstdata['h']
if self.layer_norm:
node_repr = self.layer_norm_weight(node_repr)
if self.bias:
node_repr = node_repr + self.h_bias
if self.self_loop:
......
......@@ -59,6 +59,8 @@ class RelGraphConv(layers.Layer):
Turn it on when you encounter OOM problem during training or evaluation.
dropout : float, optional
Dropout rate. Default: 0.0
layer_norm: float, optional
Add layer norm. Default: False
"""
def __init__(self,
......@@ -71,7 +73,8 @@ class RelGraphConv(layers.Layer):
activation=None,
self_loop=False,
low_mem=False,
dropout=0.0):
dropout=0.0,
layer_norm=False):
super(RelGraphConv, self).__init__()
self.in_feat = in_feat
self.out_feat = out_feat
......@@ -85,6 +88,8 @@ class RelGraphConv(layers.Layer):
self.self_loop = self_loop
self.low_mem = low_mem
assert layer_norm is False, 'TensorFlow currently does not support layer norm.'
xinit = tf.keras.initializers.glorot_uniform()
zeroinit = tf.keras.initializers.zeros()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment