Unverified Commit 4ef01dbb authored by xiang song(charlie.song)'s avatar xiang song(charlie.song) Committed by GitHub
Browse files

[Example] Rgcn support ogbn-mag dataset. (#1812)



* rgcn support ogbn-mag dataset

* upd

* multi-gpu val and test

* Fix

* fix

* Add support for ogbn-mag

* Fix

* Fix

* Fix

* Fix

* Add layer_norm

* update

* Fix merge

* Clean some code

* update Readme

* upd
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-68-185.ec2.internal>
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-87-240.ec2.internal>
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-51-214.ec2.internal>
parent e7515773
...@@ -40,7 +40,7 @@ python3 entity_classify.py -d am --n-bases=40 --n-hidden=10 --l2norm=5e-4 --test ...@@ -40,7 +40,7 @@ python3 entity_classify.py -d am --n-bases=40 --n-hidden=10 --l2norm=5e-4 --test
### Entity Classification with minibatch ### Entity Classification with minibatch
AIFB: accuracy avg(5 runs) 90.56%, best 94.44% (DGL) AIFB: accuracy avg(5 runs) 90.56%, best 94.44% (DGL)
``` ```
python3 entity_classify_mp.py -d aifb --testing --gpu 0 --fanout=20 --batch-size 128 python3 entity_classify_mp.py -d aifb --testing --gpu 0 --fanout='20,20' --batch-size 128
``` ```
MUTAG: accuracy avg(5 runs) 66.77%, best 69.12% (DGL) MUTAG: accuracy avg(5 runs) 66.77%, best 69.12% (DGL)
...@@ -49,16 +49,30 @@ python3 entity_classify_mp.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gp ...@@ -49,16 +49,30 @@ python3 entity_classify_mp.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gp
``` ```
BGS: accuracy avg(5 runs) 91.72%, best 96.55% (DGL) BGS: accuracy avg(5 runs) 91.72%, best 96.55% (DGL)
``` ```
python3 entity_classify_mp.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout 40 --n-epochs=40 --batch-size=128 python3 entity_classify_mp.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout '40,40' --n-epochs=40 --batch-size=128
``` ```
AM: accuracy avg(5 runs) 88.28%, best 90.40% (DGL) AM: accuracy avg(5 runs) 88.28%, best 90.40% (DGL)
``` ```
python3 entity_classify_mp.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout 35 --batch-size 256 --lr 1e-2 --n-hidden 16 --use-self-loop --n-epochs=40 python3 entity_classify_mp.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout '35,35' --batch-size 256 --lr 1e-2 --n-hidden 16 --use-self-loop --n-epochs=40
```
### Entity Classification on OGBN-MAG
Test-bd: P3-8xlarge
OGBN-MAG accuracy 46.22
```
python3 entity_classify_mp.py -d ogbn-mag --testing --fanout='25,30' --batch-size 512 --n-hidden 64 --lr 0.01 --num-worker 0 --eval-batch-size 8 --low-mem --gpu 0,1,2,3,4,5,6,7 --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --mix-cpu-gpu --node-feats --layer-norm
``` ```
OGBN-MAG without node-feats 43.24
```
python3 entity_classify_mp.py -d ogbn-mag --testing --fanout='25,25' --batch-size 256 --n-hidden 64 --lr 0.01 --num-worker 0 --eval-batch-size 8 --low-mem --gpu 0,1,2,3,4,5,6,7 --dropout 0.5 --use-self-loop --n-bases 2 --n-epochs 3 --mix-cpu-gpu --layer-norm
```
Test-bd: P2-8xlarge
### Link Prediction ### Link Prediction
FB15k-237: MRR 0.151 (DGL), 0.158 (paper) FB15k-237: MRR 0.151 (DGL), 0.158 (paper)
``` ```
......
This diff is collapsed.
...@@ -61,7 +61,7 @@ class RelGraphEmbedLayer(nn.Module): ...@@ -61,7 +61,7 @@ class RelGraphEmbedLayer(nn.Module):
num_of_ntype : int num_of_ntype : int
Number of node types Number of node types
input_size : list of int input_size : list of int
A list of input feature size for each node type. If None, we then A list of input feature size for each node type. If None, we then
treat certain input feature as an one-hot encoding feature. treat certain input feature as an one-hot encoding feature.
embed_size : int embed_size : int
Output embed size Output embed size
...@@ -91,16 +91,15 @@ class RelGraphEmbedLayer(nn.Module): ...@@ -91,16 +91,15 @@ class RelGraphEmbedLayer(nn.Module):
for ntype in range(num_of_ntype): for ntype in range(num_of_ntype):
if input_size[ntype] is not None: if input_size[ntype] is not None:
loc = node_tids == ntype input_emb_size = input_size[ntype].shape[1]
input_emb_size = node_tids[loc].shape[0]
embed = nn.Parameter(th.Tensor(input_emb_size, self.embed_size)) embed = nn.Parameter(th.Tensor(input_emb_size, self.embed_size))
nn.init.xavier_uniform_(embed, gain=nn.init.calculate_gain('relu')) nn.init.xavier_uniform_(embed)
self.embeds[str(ntype)] = embed self.embeds[str(ntype)] = embed
self.node_embeds = th.nn.Embedding(node_tids.shape[0], self.embed_size, sparse=self.sparse_emb) self.node_embeds = th.nn.Embedding(node_tids.shape[0], self.embed_size, sparse=self.sparse_emb)
nn.init.uniform_(self.node_embeds.weight, -1.0, 1.0) nn.init.uniform_(self.node_embeds.weight, -1.0, 1.0)
def forward(self, node_ids, node_tids, features): def forward(self, node_ids, node_tids, type_ids, features):
"""Forward computation """Forward computation
Parameters Parameters
---------- ----------
...@@ -111,19 +110,21 @@ class RelGraphEmbedLayer(nn.Module): ...@@ -111,19 +110,21 @@ class RelGraphEmbedLayer(nn.Module):
features : list of features features : list of features
list of initial features for nodes belong to different node type. list of initial features for nodes belong to different node type.
If None, the corresponding features is an one-hot encoding feature, If None, the corresponding features is an one-hot encoding feature,
else use the features directly as input feature and matmul a else use the features directly as input feature and matmul a
projection matrix. projection matrix.
Returns Returns
------- -------
tensor tensor
embeddings as the input of the next layer embeddings as the input of the next layer
""" """
tsd_idx = node_ids < self.num_nodes tsd_ids = node_ids.to(self.node_embeds.weight.device)
tsd_ids = node_ids[tsd_idx] embeds = th.empty(node_ids.shape[0], self.embed_size, device=self.dev_id)
embeds = self.node_embeds(tsd_ids)
for ntype in range(self.num_of_ntype): for ntype in range(self.num_of_ntype):
if features[ntype] is not None: if features[ntype] is not None:
loc = node_tids == ntype loc = node_tids == ntype
embeds[loc] = features[ntype] @ self.embeds[str(ntype)] embeds[loc] = features[ntype][type_ids[loc]].to(self.dev_id) @ self.embeds[str(ntype)].to(self.dev_id)
else:
loc = node_tids == ntype
embeds[loc] = self.node_embeds(tsd_ids[loc]).to(self.dev_id)
return embeds.to(self.dev_id) return embeds
...@@ -61,6 +61,8 @@ class RelGraphConv(gluon.Block): ...@@ -61,6 +61,8 @@ class RelGraphConv(gluon.Block):
Default: False. Default: False.
dropout : float, optional dropout : float, optional
Dropout rate. Default: 0.0 Dropout rate. Default: 0.0
layer_norm: float, optional
Add layer norm. Default: False
""" """
def __init__(self, def __init__(self,
in_feat, in_feat,
...@@ -72,7 +74,8 @@ class RelGraphConv(gluon.Block): ...@@ -72,7 +74,8 @@ class RelGraphConv(gluon.Block):
activation=None, activation=None,
self_loop=False, self_loop=False,
low_mem=False, low_mem=False,
dropout=0.0): dropout=0.0,
layer_norm=False):
super(RelGraphConv, self).__init__() super(RelGraphConv, self).__init__()
self.in_feat = in_feat self.in_feat = in_feat
self.out_feat = out_feat self.out_feat = out_feat
...@@ -86,6 +89,7 @@ class RelGraphConv(gluon.Block): ...@@ -86,6 +89,7 @@ class RelGraphConv(gluon.Block):
self.self_loop = self_loop self.self_loop = self_loop
assert low_mem is False, 'MXNet currently does not support low-memory implementation.' assert low_mem is False, 'MXNet currently does not support low-memory implementation.'
assert layer_norm is False, 'MXNet currently does not support layer norm.'
if regularizer == "basis": if regularizer == "basis":
# add basis weights # add basis weights
......
...@@ -59,6 +59,8 @@ class RelGraphConv(nn.Module): ...@@ -59,6 +59,8 @@ class RelGraphConv(nn.Module):
Turn it on when you encounter OOM problem during training or evaluation. Turn it on when you encounter OOM problem during training or evaluation.
dropout : float, optional dropout : float, optional
Dropout rate. Default: 0.0 Dropout rate. Default: 0.0
layer_norm: float, optional
Add layer norm. Default: False
""" """
def __init__(self, def __init__(self,
in_feat, in_feat,
...@@ -70,7 +72,8 @@ class RelGraphConv(nn.Module): ...@@ -70,7 +72,8 @@ class RelGraphConv(nn.Module):
activation=None, activation=None,
self_loop=False, self_loop=False,
low_mem=False, low_mem=False,
dropout=0.0): dropout=0.0,
layer_norm=False):
super(RelGraphConv, self).__init__() super(RelGraphConv, self).__init__()
self.in_feat = in_feat self.in_feat = in_feat
self.out_feat = out_feat self.out_feat = out_feat
...@@ -83,6 +86,7 @@ class RelGraphConv(nn.Module): ...@@ -83,6 +86,7 @@ class RelGraphConv(nn.Module):
self.activation = activation self.activation = activation
self.self_loop = self_loop self.self_loop = self_loop
self.low_mem = low_mem self.low_mem = low_mem
self.layer_norm = layer_norm
if regularizer == "basis": if regularizer == "basis":
# add basis weights # add basis weights
...@@ -120,6 +124,10 @@ class RelGraphConv(nn.Module): ...@@ -120,6 +124,10 @@ class RelGraphConv(nn.Module):
self.h_bias = nn.Parameter(th.Tensor(out_feat)) self.h_bias = nn.Parameter(th.Tensor(out_feat))
nn.init.zeros_(self.h_bias) nn.init.zeros_(self.h_bias)
# layer norm
if self.layer_norm:
self.layer_norm_weight = nn.LayerNorm(n_hidden, elementwise_affine=True)
# weight for self loop # weight for self loop
if self.self_loop: if self.self_loop:
self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat)) self.loop_weight = nn.Parameter(th.Tensor(in_feat, out_feat))
...@@ -219,6 +227,8 @@ class RelGraphConv(nn.Module): ...@@ -219,6 +227,8 @@ class RelGraphConv(nn.Module):
g.update_all(self.message_func, fn.sum(msg='msg', out='h')) g.update_all(self.message_func, fn.sum(msg='msg', out='h'))
# apply bias and activation # apply bias and activation
node_repr = g.dstdata['h'] node_repr = g.dstdata['h']
if self.layer_norm:
node_repr = self.layer_norm_weight(node_repr)
if self.bias: if self.bias:
node_repr = node_repr + self.h_bias node_repr = node_repr + self.h_bias
if self.self_loop: if self.self_loop:
......
...@@ -59,6 +59,8 @@ class RelGraphConv(layers.Layer): ...@@ -59,6 +59,8 @@ class RelGraphConv(layers.Layer):
Turn it on when you encounter OOM problem during training or evaluation. Turn it on when you encounter OOM problem during training or evaluation.
dropout : float, optional dropout : float, optional
Dropout rate. Default: 0.0 Dropout rate. Default: 0.0
layer_norm: float, optional
Add layer norm. Default: False
""" """
def __init__(self, def __init__(self,
...@@ -71,7 +73,8 @@ class RelGraphConv(layers.Layer): ...@@ -71,7 +73,8 @@ class RelGraphConv(layers.Layer):
activation=None, activation=None,
self_loop=False, self_loop=False,
low_mem=False, low_mem=False,
dropout=0.0): dropout=0.0,
layer_norm=False):
super(RelGraphConv, self).__init__() super(RelGraphConv, self).__init__()
self.in_feat = in_feat self.in_feat = in_feat
self.out_feat = out_feat self.out_feat = out_feat
...@@ -85,6 +88,8 @@ class RelGraphConv(layers.Layer): ...@@ -85,6 +88,8 @@ class RelGraphConv(layers.Layer):
self.self_loop = self_loop self.self_loop = self_loop
self.low_mem = low_mem self.low_mem = low_mem
assert layer_norm is False, 'TensorFlow currently does not support layer norm.'
xinit = tf.keras.initializers.glorot_uniform() xinit = tf.keras.initializers.glorot_uniform()
zeroinit = tf.keras.initializers.zeros() zeroinit = tf.keras.initializers.zeros()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment