[Dataset] RDF dataset with DGL-Dataset template (#1869)

* update rdf builtin dataset * Fix * use new dataset * fix * rdf dataset using new framework * tf work * Fix mxnet * Fix tensorflow * Fix mxnet * Update * upd * update some docstring * clean some dead code Co-authored-by: Ubuntu <ubuntu@ip-172-31-51-214.ec2.internal>

[Dataset] RDF dataset with DGL-Dataset template (#1869)
* update rdf builtin dataset * Fix * use new dataset * fix * rdf dataset using new framework * tf work * Fix mxnet * Fix tensorflow * Fix mxnet * Update * upd * update some docstring * clean some dead code Co-authored-by: Ubuntu <ubuntu@ip-172-31-51-214.ec2.internal>
aee10679 · xiang song(charlie.song) · GitHub · 15411d93 · aee10679 · aee10679
Unverified Commit aee10679 authored Jul 30, 2020 by xiang song(charlie.song) Committed by GitHub Jul 30, 2020
14 changed files
--- a/examples/mxnet/rgcn/README.md
+++ b/examples/mxnet/rgcn/README.md
@@ -5,7 +5,7 @@
 * Author's code for link prediction: [https://github.com/MichSchli/RelationPrediction](https://github.com/MichSchli/RelationPrediction)

 ### Dependencies
-Two extra python packages are needed for this example: 
+Two extra python packages are needed for this example:

 - MXNet nightly build
 - requests
@@ -20,17 +20,17 @@ pip install requests rdflib pandas
 Example code was tested with rdflib 4.2.2 and pandas 0.23.4

 ### Entity Classification
-AIFB: accuracy 97.22% (DGL), 95.83% (paper)
+AIFB: accuracy 97.22% (5 runs, DGL), 95.83% (paper)
 ```
 DGLBACKEND=mxnet python3 entity_classify.py -d aifb --testing --gpu 0
 ```

-MUTAG: accuracy 73.53% (DGL), 73.23% (paper)
+MUTAG: accuracy 70.59% (5 runs, DGL), 73.23% (paper)
 ```
 DGLBACKEND=mxnet python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 40 --testing --gpu 0
 ```

-BGS: accuracy 75.86% (DGL, n-basese=20, OOM when >20), 83.10% (paper)
+BGS: accuracy 86.21% (5 runs, DGL, n-basese=20), 83.10% (paper)
 ```
-DGLBACKEND=mxnet python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 20 --testing --gpu 0 --relabel
+DGLBACKEND=mxnet python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 20 --testing --gpu 0
 ```
--- a/examples/mxnet/rgcn/entity_classify.py
+++ b/examples/mxnet/rgcn/entity_classify.py
@@ -14,10 +14,11 @@ import time
 import mxnet as mx
 from mxnet import gluon
 import mxnet.ndarray as F
-from dgl import DGLGraph
+import dgl
 from dgl.nn.mxnet import RelGraphConv
 from dgl.contrib.data import load_data
 from functools import partial
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset

 from model import BaseRGCN

@@ -39,13 +40,29 @@ class EntityClassify(BaseRGCN):

 def main(args):
    # load graph data
-    data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel)
-    num_nodes = data.num_nodes
-    num_rels = data.num_rels
-    num_classes = data.num_classes
-    labels = data.labels
-    train_idx = data.train_idx
-    test_idx = data.test_idx
+    if args.dataset == 'aifb':
+        dataset = AIFBDataset()
+    elif args.dataset == 'mutag':
+        dataset = MUTAGDataset()
+    elif args.dataset == 'bgs':
+        dataset = BGSDataset()
+    elif args.dataset == 'am':
+        dataset = AMDataset()
+    else:
+        raise ValueError()
+
+    # Load from hetero-graph
+    hg = dataset[0]
+
+    num_rels = len(hg.canonical_etypes)
+    num_of_ntype = len(hg.ntypes)
+    category = dataset.predict_category
+    num_classes = dataset.num_classes
+    train_mask = hg.nodes[category].data.pop('train_mask')
+    test_mask = hg.nodes[category].data.pop('test_mask')
+    train_idx = mx.nd.array(np.nonzero(train_mask.asnumpy())[0], dtype='int64')
+    test_idx = mx.nd.array(np.nonzero(test_mask.asnumpy())[0], dtype='int64')
+    labels = mx.nd.array(hg.nodes[category].data.pop('labels'), dtype='int64')

    # split dataset into train, validate, test
    if args.validation:
@@ -54,13 +71,35 @@ def main(args):
    else:
        val_idx = train_idx

-    train_idx = mx.nd.array(train_idx)
+    # calculate norm for each edge type and store in edge
+    for canonical_etype in hg.canonical_etypes:
+        u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
+        v = v.asnumpy()
+        _, inverse_index, count = np.unique(v, return_inverse=True, return_counts=True)
+        degrees = count[inverse_index]
+        norm = np.ones(eid.shape[0]) / degrees
+        hg.edges[canonical_etype].data['norm'] = mx.nd.expand_dims(mx.nd.array(norm), axis=1)
+
+    # get target category id
+    category_id = len(hg.ntypes)
+    for i, ntype in enumerate(hg.ntypes):
+        if ntype == category:
+            category_id = i
+
+    g = dgl.to_homo(hg)
+    num_nodes = g.number_of_nodes()
+    node_ids = mx.nd.arange(num_nodes)
+    edge_norm = g.edata['norm']
+    edge_type = g.edata[dgl.ETYPE]
+
+    # find out the target node ids in g
+    node_tids = g.ndata[dgl.NTYPE]
+    loc = (node_tids == category_id)
+    loc = mx.nd.array(np.nonzero(loc.asnumpy())[0], dtype='int64')
+    target_idx = node_ids[loc]
+
    # since the nodes are featureless, the input feature is then the node id.
    feats = mx.nd.arange(num_nodes, dtype='int32')
-    # edge type and normalization factor
-    edge_type = mx.nd.array(data.edge_type, dtype='int32')
-    edge_norm = mx.nd.array(data.edge_norm).expand_dims(1)
-    labels = mx.nd.array(labels).reshape((-1))

    # check cuda
    use_cuda = args.gpu >= 0
@@ -71,16 +110,12 @@ def main(args):
        edge_norm = edge_norm.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        train_idx = train_idx.as_in_context(ctx)
+        g = g.to(ctx)
    else:
        ctx = mx.cpu(0)

-    # create graph
-    g = DGLGraph()
-    g.add_nodes(num_nodes)
-    g.add_edges(data.edge_src, data.edge_dst)
-
    # create model
-    model = EntityClassify(len(g),
+    model = EntityClassify(num_nodes,
                           args.n_hidden,
                           num_classes,
                           num_rels,
@@ -103,6 +138,7 @@ def main(args):
        t0 = time.time()
        with mx.autograd.record():
            pred = model(g, feats, edge_type, edge_norm)
+            pred = pred[target_idx]
            loss = loss_fcn(pred[train_idx], labels[train_idx])
        t1 = time.time()
        loss.backward()
@@ -113,13 +149,15 @@ def main(args):
        backward_time.append(t2 - t1)
        print("Epoch {:05d} | Train Forward Time(s) {:.4f} | Backward Time(s) {:.4f}".
              format(epoch, forward_time[-1], backward_time[-1]))
-        train_acc = F.sum(pred[train_idx].argmax(axis=1) == labels[train_idx]).asscalar() / train_idx.shape[0]
-        val_acc = F.sum(pred[val_idx].argmax(axis=1) == labels[val_idx]).asscalar() / len(val_idx)
+
+        train_acc = F.sum(mx.nd.cast(pred[train_idx].argmax(axis=1), 'int64') == labels[train_idx]).asscalar() / train_idx.shape[0]
+        val_acc = F.sum(mx.nd.cast(pred[val_idx].argmax(axis=1), 'int64')  == labels[val_idx]).asscalar() / len(val_idx)
        print("Train Accuracy: {:.4f} | Validation Accuracy: {:.4f}".format(train_acc, val_acc))
    print()

    logits = model.forward(g, feats, edge_type, edge_norm)
-    test_acc = F.sum(logits[test_idx].argmax(axis=1) == labels[test_idx]).asscalar() / len(test_idx)
+    logits = logits[target_idx]
+    test_acc = F.sum(mx.nd.cast(logits[test_idx].argmax(axis=1), 'int64')  == labels[test_idx]).asscalar() / len(test_idx)
    print("Test Accuracy: {:.4f}".format(test_acc))
    print()

@@ -147,8 +185,6 @@ if __name__ == '__main__':
            help="dataset to use")
    parser.add_argument("--l2norm", type=float, default=0,
            help="l2 norm coef")
-    parser.add_argument("--relabel", default=False, action='store_true',
-            help="remove untouched nodes and relabel")
    parser.add_argument("--use-self-loop", default=False, action='store_true',
            help="include self feature as a special relation")
    fp = parser.add_mutually_exclusive_group(required=False)

--- a/examples/pytorch/rgcn-hetero/README.md
+++ b/examples/pytorch/rgcn-hetero/README.md
@@ -36,46 +36,47 @@ Example code was tested with rdflib 4.2.2 and pandas 0.23.4

 All experiments use one-hot encoding as featureless input. Best accuracy reported.

-AIFB: accuracy 97.22% (DGL), 95.83% (paper)
+
+AIFB: accuracy 96.11% (5 runs, DGL), 95.83% (paper)
 ```
 python3 entity_classify.py -d aifb --testing --gpu 0
 ```

-MUTAG: accuracy 73.53% (DGL), 73.23% (paper)
+MUTAG: accuracy 72.06% (5 runs, DGL), 73.23% (paper)
 ```
 python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0
 ```

-BGS: accuracy 93.10% (DGL), 83.10% (paper)
+BGS: accuracy 91.73% (5 runs, DGL), 83.10% (paper)
 ```
 python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0
 ```

-AM: accuracy 91.41% (DGL), 89.29% (paper)
+AM: accuracy 88.28% (5 runs, DGL), 89.29% (paper)
 ```
 python3 entity_classify.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0
 ```

 ### Entity Classification w/ minibatch training

-Accuracy numbers are reported by 10 runs.
+Accuracy numbers are reported by 5 runs.

-AIFB: accuracy best=97.22% avg=93.33%
+AIFB: accuracy best=97.22% avg=94.44%
 ```
 python3 entity_classify_mb.py -d aifb --testing --gpu 0 --fanout=8
 ```

-MUTAG: accuracy best=76.47% avg=68.38%
+MUTAG: accuracy best=76.47% avg=67.37%
 ```
 python3 entity_classify_mb.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0 --batch-size=50 --fanout=8
 ```

-BGS: accuracy best=96.55% avg=92.41%
+BGS: accuracy best=96.55% avg=91.04%
 ```
 python3 entity_classify_mb.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0
 ```

-AM: accuracy best=90.91% avg=88.43%
+AM: accuracy best=89.39% avg=88.55%
 ```
 python3 entity_classify_mb.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0
 ```

--- a/examples/pytorch/rgcn-hetero/entity_classify.py
+++ b/examples/pytorch/rgcn-hetero/entity_classify.py
@@ -9,28 +9,30 @@ import torch as th
 import torch.nn as nn
 import torch.nn.functional as F

-from dgl.data.rdf import AIFB, MUTAG, BGS, AM
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
 from model import EntityClassify

 def main(args):
    # load graph data
    if args.dataset == 'aifb':
-        dataset = AIFB()
+        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
-        dataset = MUTAG()
+        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
-        dataset = BGS()
+        dataset = BGSDataset()
    elif args.dataset == 'am':
-        dataset = AM()
+        dataset = AMDataset()
    else:
        raise ValueError()

-    g = dataset.graph
+    g = dataset[0]
    category = dataset.predict_category
    num_classes = dataset.num_classes
-    train_idx = dataset.train_idx
-    test_idx = dataset.test_idx
-    labels = dataset.labels
+    train_mask = g.nodes[category].data.pop('train_mask')
+    test_mask = g.nodes[category].data.pop('test_mask')
+    train_idx = th.nonzero(train_mask).squeeze()
+    test_idx = th.nonzero(test_mask).squeeze()
+    labels = g.nodes[category].data.pop('labels')
    category_id = len(g.ntypes)
    for i, ntype in enumerate(g.ntypes):
        if ntype == category:

--- a/examples/pytorch/rgcn-hetero/entity_classify_mb.py
+++ b/examples/pytorch/rgcn-hetero/entity_classify_mb.py
@@ -13,7 +13,7 @@ from torch.utils.data import DataLoader
 from functools import partial

 import dgl
-from dgl.data.rdf import AIFB, MUTAG, BGS, AM
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
 from model import EntityClassify, RelGraphEmbed

 def extract_embed(node_embed, input_nodes):
@@ -45,22 +45,24 @@ def evaluate(model, loader, node_embed, labels, category, device):
 def main(args):
    # load graph data
    if args.dataset == 'aifb':
-        dataset = AIFB()
+        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
-        dataset = MUTAG()
+        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
-        dataset = BGS()
+        dataset = BGSDataset()
    elif args.dataset == 'am':
-        dataset = AM()
+        dataset = AMDataset()
    else:
        raise ValueError()

-    g = dataset.graph
+    g = dataset[0]
    category = dataset.predict_category
    num_classes = dataset.num_classes
-    train_idx = dataset.train_idx
-    test_idx = dataset.test_idx
-    labels = dataset.labels
+    train_mask = g.nodes[category].data.pop('train_mask')
+    test_mask = g.nodes[category].data.pop('test_mask')
+    train_idx = th.nonzero(train_mask).squeeze()
+    test_idx = th.nonzero(test_mask).squeeze()
+    labels = g.nodes[category].data.pop('labels')

    # split dataset into train, validate, test
    if args.validation:

--- a/examples/pytorch/rgcn-hetero/test_classify.py
+++ b/examples/pytorch/rgcn-hetero/test_classify.py
@@ -11,21 +11,22 @@ from entity_classify import EntityClassify
 def main(args):
    # load graph data
    if args.dataset == 'aifb':
-        dataset = AIFB()
+        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
-        dataset = MUTAG()
+        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
-        dataset = BGS()
+        dataset = BGSDataset()
    elif args.dataset == 'am':
-        dataset = AM()
+        dataset = AMDataset()
    else:
        raise ValueError()

-    g = dataset.graph
+    g = dataset[0]
    category = dataset.predict_category
    num_classes = dataset.num_classes
-    test_idx = dataset.test_idx
-    labels = dataset.labels
+    test_mask = g.nodes[category].data.pop('test_mask')
+    test_idx = th.nonzero(test_mask).squeeze()
+    labels = g.nodes[category].data.pop('labels')

    # check cuda
    use_cuda = args.gpu >= 0 and th.cuda.is_available()
@@ -42,7 +43,6 @@ def main(args):
                           num_bases=args.n_bases,
                           num_hidden_layers=args.n_layers - 2,
                           use_self_loop=args.use_self_loop)
-    # training loop
    model.load_state_dict(th.load(args.model_path))
    if use_cuda:
        model.cuda()
@@ -54,7 +54,7 @@ def main(args):
    test_acc = th.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx)
    print("Test Acc: {:.4f} | Test loss: {:.4f}".format(test_acc, test_loss.item()))
    print()
-    
+
 if __name__ == '__main__':
    parser = argparse.ArgumentParser(description='RGCN')
    parser.add_argument("--n-hidden", type=int, default=16,

--- a/examples/pytorch/rgcn/README.md
+++ b/examples/pytorch/rgcn/README.md
@@ -17,44 +17,44 @@ pip install requests torch rdflib pandas
 Example code was tested with rdflib 4.2.2 and pandas 0.23.4

 ### Entity Classification
-AIFB: accuracy 97.22% (DGL), 95.83% (paper)
+AIFB: accuracy 92.59% (3 runs, DGL), 95.83% (paper)
 ```
 python3 entity_classify.py -d aifb --testing --gpu 0
 ```

-MUTAG: accuracy 75% (DGL), 73.23% (paper)
+MUTAG: accuracy 72.55% (3 runs, DGL), 73.23% (paper)
 ```
 python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0
 ```

-BGS: accuracy 82.76% (DGL), 83.10% (paper)
+BGS: accuracy 89.66% (3 runs, DGL), 83.10% (paper)
 ```
-python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --relabel
+python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0
 ```

-AM: accuracy 87.37% (DGL), 89.29% (paper)
+AM: accuracy 89.73% (3 runs, DGL), 89.29% (paper)
 ```
 python3 entity_classify.py -d am --n-bases=40 --n-hidden=10 --l2norm=5e-4 --testing
 ```

 ### Entity Classification with minibatch
-AIFB: accuracy avg(5 runs) 94.99%, best 97.22% (DGL)
+AIFB: accuracy avg(5 runs) 90.56%, best 94.44% (DGL)
 ```
 python3 entity_classify_mp.py -d aifb --testing --gpu 0 --fanout=20 --batch-size 128
 ```

-MUTAG: accuracy avg(5 runs) 67.06%, best 80.88% (DGL)
+MUTAG: accuracy avg(5 runs) 66.77%, best 69.12% (DGL)
 ```
-python3 entity_classify_mp.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0 --batch-size 256 --use-self-loop --n-epochs 40 --dropout=0.3
+python3 entity_classify_mp.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0 --batch-size 256 --use-self-loop --n-epochs 40
 ```

-BGS: accuracy avg(5 runs) 84.14%, best 89.66% (DGL)
+BGS: accuracy avg(5 runs) 91.72%, best 96.55% (DGL)

 ```
 python3 entity_classify_mp.py -d bgs --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout 40 --n-epochs=40 --batch-size=128
 ```

-AM: accuracy avg(5 runs) 88.28%, best 90.91% (DGL)
+AM: accuracy avg(5 runs) 88.28%, best 90.40% (DGL)
 ```
 python3 entity_classify_mp.py -d am --l2norm 5e-4 --n-bases 40 --testing --gpu 0 --fanout 35 --batch-size 256 --lr 1e-2 --n-hidden 16 --use-self-loop --n-epochs=40
 ```

--- a/examples/pytorch/rgcn/entity_classify.py
+++ b/examples/pytorch/rgcn/entity_classify.py
@@ -13,10 +13,10 @@ import numpy as np
 import time
 import torch
 import torch.nn.functional as F
-from dgl import DGLGraph
+import dgl
 from dgl.nn.pytorch import RelGraphConv
-from dgl.contrib.data import load_data
 from functools import partial
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset

 from model import BaseRGCN

@@ -44,13 +44,29 @@ class EntityClassify(BaseRGCN):

 def main(args):
    # load graph data
-    data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel)
-    num_nodes = data.num_nodes
-    num_rels = data.num_rels
-    num_classes = data.num_classes
-    labels = data.labels
-    train_idx = data.train_idx
-    test_idx = data.test_idx
+    if args.dataset == 'aifb':
+        dataset = AIFBDataset()
+    elif args.dataset == 'mutag':
+        dataset = MUTAGDataset()
+    elif args.dataset == 'bgs':
+        dataset = BGSDataset()
+    elif args.dataset == 'am':
+        dataset = AMDataset()
+    else:
+        raise ValueError()
+
+    # Load from hetero-graph
+    hg = dataset[0]
+
+    num_rels = len(hg.canonical_etypes)
+    num_of_ntype = len(hg.ntypes)
+    category = dataset.predict_category
+    num_classes = dataset.num_classes
+    train_mask = hg.nodes[category].data.pop('train_mask')
+    test_mask = hg.nodes[category].data.pop('test_mask')
+    train_idx = torch.nonzero(train_mask).squeeze()
+    test_idx = torch.nonzero(test_mask).squeeze()
+    labels = hg.nodes[category].data.pop('labels')

    # split dataset into train, validate, test
    if args.validation:
@@ -59,14 +75,35 @@ def main(args):
    else:
        val_idx = train_idx

+    # calculate norm for each edge type and store in edge
+    for canonical_etype in hg.canonical_etypes:
+        u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
+        _, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
+        degrees = count[inverse_index]
+        norm = torch.ones(eid.shape[0]).float() / degrees.float()
+        norm = norm.unsqueeze(1)
+        hg.edges[canonical_etype].data['norm'] = norm
+
+    # get target category id
+    category_id = len(hg.ntypes)
+    for i, ntype in enumerate(hg.ntypes):
+        if ntype == category:
+            category_id = i
+
+    g = dgl.to_homo(hg)
+    num_nodes = g.number_of_nodes()
+    node_ids = torch.arange(num_nodes)
+    edge_norm = g.edata['norm']
+    edge_type = g.edata[dgl.ETYPE].long()
+
+    # find out the target node ids in g
+    node_tids = g.ndata[dgl.NTYPE]
+    loc = (node_tids == category_id)
+    target_idx = node_ids[loc]
+
    # since the nodes are featureless, the input feature is then the node id.
    feats = torch.arange(num_nodes)

-    # edge type and normalization factor
-    edge_type = torch.from_numpy(data.edge_type).long()
-    edge_norm = torch.from_numpy(data.edge_norm).unsqueeze(1).long()
-    labels = torch.from_numpy(labels).view(-1).long()
-
    # check cuda
    use_cuda = args.gpu >= 0 and torch.cuda.is_available()
    if use_cuda:
@@ -76,13 +113,8 @@ def main(args):
        edge_norm = edge_norm.cuda()
        labels = labels.cuda()

-    # create graph
-    g = DGLGraph()
-    g.add_nodes(num_nodes)
-    g.add_edges(data.edge_src, data.edge_dst)
-
    # create model
-    model = EntityClassify(len(g),
+    model = EntityClassify(num_nodes,
                           args.n_hidden,
                           num_classes,
                           num_rels,
@@ -108,6 +140,7 @@ def main(args):
        optimizer.zero_grad()
        t0 = time.time()
        logits = model(g, feats, edge_type, edge_norm)
+        logits = logits[target_idx]
        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
        t1 = time.time()
        loss.backward()
@@ -127,6 +160,7 @@ def main(args):

    model.eval()
    logits = model.forward(g, feats, edge_type, edge_norm)
+    logits = logits[target_idx]
    test_loss = F.cross_entropy(logits[test_idx], labels[test_idx])
    test_acc = torch.sum(logits[test_idx].argmax(dim=1) == labels[test_idx]).item() / len(test_idx)
    print("Test Accuracy: {:.4f} | Test loss: {:.4f}".format(test_acc, test_loss.item()))
@@ -156,8 +190,6 @@ if __name__ == '__main__':
            help="dataset to use")
    parser.add_argument("--l2norm", type=float, default=0,
            help="l2 norm coef")
-    parser.add_argument("--relabel", default=False, action='store_true',
-            help="remove untouched nodes and relabel")
    parser.add_argument("--use-self-loop", default=False, action='store_true',
            help="include self feature as a special relation")
    fp = parser.add_mutually_exclusive_group(required=False)
@@ -167,5 +199,4 @@ if __name__ == '__main__':

    args = parser.parse_args()
    print(args)
-    args.bfs_level = args.n_layers + 1 # pruning used nodes for memory
    main(args)
--- a/examples/pytorch/rgcn/entity_classify_mp.py
+++ b/examples/pytorch/rgcn/entity_classify_mp.py
@@ -21,7 +21,7 @@ import dgl
 from dgl import DGLGraph
 from functools import partial

-from dgl.data.rdf import AIFB, MUTAG, BGS, AM
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
 from model import RelGraphEmbedLayer
 from dgl.nn import RelGraphConv
 from utils import thread_wrapped_func
@@ -321,32 +321,34 @@ def run(proc_id, n_gpus, args, devices, dataset):
    print("{}/{} Mean forward time: {:4f}".format(proc_id, n_gpus,
                                                  np.mean(forward_time[len(forward_time) // 4:])))
    print("{}/{} Mean backward time: {:4f}".format(proc_id, n_gpus,
-                                                   np.mean(backward_time[len(backward_time) // 4:]))) 
+                                                   np.mean(backward_time[len(backward_time) // 4:])))

 def main(args, devices):
    # load graph data
    ogb_dataset = False
    if args.dataset == 'aifb':
-        dataset = AIFB()
+        dataset = AIFBDataset()
    elif args.dataset == 'mutag':
-        dataset = MUTAG()
+        dataset = MUTAGDataset()
    elif args.dataset == 'bgs':
-        dataset = BGS()
+        dataset = BGSDataset()
    elif args.dataset == 'am':
-        dataset = AM()
+        dataset = AMDataset()
    else:
        raise ValueError()

    # Load from hetero-graph
-    hg = dataset.graph
+    hg = dataset[0]

    num_rels = len(hg.canonical_etypes)
    num_of_ntype = len(hg.ntypes)
    category = dataset.predict_category
    num_classes = dataset.num_classes
-    train_idx = dataset.train_idx
-    test_idx = dataset.test_idx
-    labels = dataset.labels
+    train_mask = hg.nodes[category].data.pop('train_mask')
+    test_mask = hg.nodes[category].data.pop('test_mask')
+    labels = hg.nodes[category].data.pop('labels')
+    train_idx = th.nonzero(train_mask).squeeze()
+    test_idx = th.nonzero(test_mask).squeeze()

    # split dataset into train, validate, test
    if args.validation:
@@ -356,14 +358,14 @@ def main(args, devices):
        val_idx = train_idx

    # calculate norm for each edge type and store in edge
-    for canonical_etypes in hg.canonical_etypes:
-        u, v, eid = hg.all_edges(form='all', etype=canonical_etypes)
+    for canonical_etype in hg.canonical_etypes:
+        u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
        _, inverse_index, count = th.unique(v, return_inverse=True, return_counts=True)
        degrees = count[inverse_index]
        norm = th.ones(eid.shape[0]) / degrees
        norm = norm.unsqueeze(1)
-        hg.edges[canonical_etypes].data['norm'] = norm
-    
+        hg.edges[canonical_etype].data['norm'] = norm
+
    # get target category id
    category_id = len(hg.ntypes)
    for i, ntype in enumerate(hg.ntypes):
@@ -385,7 +387,7 @@ def main(args, devices):
    n_gpus = len(devices)
    # cpu
    if devices[0] == -1:
-        run(0, 0, args, ['cpu'], 
+        run(0, 0, args, ['cpu'],
            (g, num_of_ntype, num_classes, num_rels, target_idx,
             train_idx, val_idx, test_idx, labels))
    # gpu

--- a/examples/tensorflow/rgcn/README.md
+++ b/examples/tensorflow/rgcn/README.md
@@ -11,23 +11,24 @@
 * pandas

 ```
-pip install requests torch rdflib pandas
+pip install requests tensorflow rdflib pandas
+export DGLBACKEND=tensorflow
 ```

 Example code was tested with rdflib 4.2.2 and pandas 0.23.4

 ### Entity Classification
-AIFB: accuracy 97.22% (DGL), 95.83% (paper)
+AIFB: accuracy 92.78% (5 runs, DGL), 95.83% (paper)
 ```
 python3 entity_classify.py -d aifb --testing --gpu 0
 ```

-MUTAG: accuracy 75% (DGL), 73.23% (paper)
+MUTAG: accuracy 71.47% (5 runs, DGL), 73.23% (paper)
 ```
 python3 entity_classify.py -d mutag --l2norm 5e-4 --n-bases 30 --testing --gpu 0
 ```

-BGS: accuracy 79.3% (DGL n-base=25), 83.10% (paper n-base=40)
+BGS: accuracy 93.10% (5 runs, DGL n-base=25), 83.10% (paper n-base=40)
 ```
-python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 25 --testing --gpu 0 --relabel
+python3 entity_classify.py -d bgs --l2norm 5e-4 --n-bases 25 --testing --gpu 0
 ```
--- a/examples/tensorflow/rgcn/entity_classify.py
+++ b/examples/tensorflow/rgcn/entity_classify.py
@@ -13,10 +13,10 @@ import numpy as np
 import time
 import tensorflow as tf
 from tensorflow.keras import layers
-from dgl import DGLGraph
+import dgl
 from dgl.nn.tensorflow import RelGraphConv
-from dgl.contrib.data import load_data
 from functools import partial
+from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset

 from model import BaseRGCN

@@ -49,28 +49,56 @@ def acc(logits, labels, mask):

 def main(args):
    # load graph data
-    data = load_data(args.dataset, bfs_level=args.bfs_level, relabel=args.relabel)
-    num_nodes = data.num_nodes
-    num_rels = data.num_rels
-    num_classes = data.num_classes
-    labels = data.labels
-    train_idx = data.train_idx
-    test_idx = data.test_idx
-
-    # split dataset into train, validate, test
-    if args.validation:
-        val_idx = train_idx[:len(train_idx) // 5]
-        train_idx = train_idx[len(train_idx) // 5:]
+    if args.dataset == 'aifb':
+        dataset = AIFBDataset()
+    elif args.dataset == 'mutag':
+        dataset = MUTAGDataset()
+    elif args.dataset == 'bgs':
+        dataset = BGSDataset()
+    elif args.dataset == 'am':
+        dataset = AMDataset()
    else:
-        val_idx = train_idx
-
-    # since the nodes are featureless, the input feature is then the node id.
-    feats = tf.range(num_nodes, dtype=tf.int64)
-
-    # edge type and normalization factor
-    edge_type = tf.convert_to_tensor(data.edge_type)
-    edge_norm = tf.expand_dims(tf.convert_to_tensor(data.edge_norm), 1)
-    labels = tf.reshape(tf.convert_to_tensor(labels), (-1, ))
+        raise ValueError()
+
+    # preprocessing in cpu
+    with tf.device("/cpu:0"):
+        # Load from hetero-graph
+        hg = dataset[0]
+
+        num_rels = len(hg.canonical_etypes)
+        num_of_ntype = len(hg.ntypes)
+        category = dataset.predict_category
+        num_classes = dataset.num_classes
+        train_mask = hg.nodes[category].data.pop('train_mask')
+        test_mask = hg.nodes[category].data.pop('test_mask')
+        train_idx = tf.squeeze(tf.where(train_mask))
+        test_idx = tf.squeeze(tf.where(test_mask))
+        labels = hg.nodes[category].data.pop('labels')
+
+        # split dataset into train, validate, test
+        if args.validation:
+            val_idx = train_idx[:len(train_idx) // 5]
+            train_idx = train_idx[len(train_idx) // 5:]
+        else:
+            val_idx = train_idx
+
+        # calculate norm for each edge type and store in edge
+        for canonical_etype in hg.canonical_etypes:
+            u, v, eid = hg.all_edges(form='all', etype=canonical_etype)
+            _, inverse_index, count = tf.unique_with_counts(v)
+            degrees = tf.gather(count, inverse_index)
+            norm = tf.ones(eid.shape[0]) / tf.cast(degrees, tf.float32)
+            norm = tf.expand_dims(norm, 1)
+            hg.edges[canonical_etype].data['norm'] = norm
+
+        # get target category id
+        category_id = len(hg.ntypes)
+        for i, ntype in enumerate(hg.ntypes):
+            if ntype == category:
+                category_id = i
+
+        # edge type and normalization factor
+        g = dgl.to_homo(hg)

    # check cuda
    if args.gpu < 0:
@@ -78,25 +106,32 @@ def main(args):
        use_cuda = False
    else:
        device = "/gpu:{}".format(args.gpu)
+        g = g.to(device)
        use_cuda = True
-    
-    with tf.device(device):
+    num_nodes = g.number_of_nodes()
+    node_ids = tf.range(num_nodes, dtype=tf.int64)
+    edge_norm = g.edata['norm']
+    edge_type = tf.cast(g.edata[dgl.ETYPE], tf.int64)

-        # create graph
-        g = DGLGraph()
-        g.add_nodes(num_nodes)
-        g.add_edges(data.edge_src, data.edge_dst)
+    # find out the target node ids in g
+    node_tids = g.ndata[dgl.NTYPE]
+    loc = (node_tids == category_id)
+    target_idx = tf.squeeze(tf.where(loc))

+    # since the nodes are featureless, the input feature is then the node id.
+    feats = tf.range(num_nodes, dtype=tf.int64)
+
+    with tf.device(device):
        # create model
-        model = EntityClassify(len(g),
-                            args.n_hidden,
-                            num_classes,
-                            num_rels,
-                            num_bases=args.n_bases,
-                            num_hidden_layers=args.n_layers - 2,
-                            dropout=args.dropout,
-                            use_self_loop=args.use_self_loop,
-                            use_cuda=use_cuda)
+        model = EntityClassify(num_nodes,
+                               args.n_hidden,
+                               num_classes,
+                               num_rels,
+                               num_bases=args.n_bases,
+                               num_hidden_layers=args.n_layers - 2,
+                               dropout=args.dropout,
+                               use_self_loop=args.use_self_loop,
+                               use_cuda=use_cuda)

        # optimizer
        optimizer = tf.keras.optimizers.Adam(
@@ -111,9 +146,10 @@ def main(args):
            t0 = time.time()
            with tf.GradientTape() as tape:
                logits = model(g, feats, edge_type, edge_norm)
+                logits = tf.gather(logits, target_idx)
                loss = loss_fcn(tf.gather(labels, train_idx), tf.gather(logits, train_idx))
                # Manually Weight Decay
-                # We found Tensorflow has a different implementation on weight decay 
+                # We found Tensorflow has a different implementation on weight decay
                # of Adam(W) optimizer with PyTorch. And this results in worse results.
                # Manually adding weights to the loss to do weight decay solves this problem.
                for weight in model.trainable_weights:
@@ -136,6 +172,7 @@ def main(args):
        print()

        logits = model(g, feats, edge_type, edge_norm)
+        logits = tf.gather(logits, target_idx)
        test_loss = loss_fcn(tf.gather(labels, test_idx), tf.gather(logits, test_idx))
        test_acc = acc(logits, labels, test_idx)
        print("Test Accuracy: {:.4f} | Test loss: {:.4f}".format(test_acc, test_loss.numpy().item()))
@@ -165,8 +202,6 @@ if __name__ == '__main__':
            help="dataset to use")
    parser.add_argument("--l2norm", type=float, default=0,
            help="l2 norm coef")
-    parser.add_argument("--relabel", default=False, action='store_true',
-            help="remove untouched nodes and relabel")
    parser.add_argument("--use-self-loop", default=False, action='store_true',
            help="include self feature as a special relation")
    fp = parser.add_mutually_exclusive_group(required=False)

--- a/python/dgl/data/dgl_dataset.py
+++ b/python/dgl/data/dgl_dataset.py
@@ -4,6 +4,7 @@
 from __future__ import absolute_import

 import os, sys
+import abc
 from .utils import download, extract_archive, get_download_dir, makedirs
 from ..utils import retry_method_with_fix

@@ -37,7 +38,7 @@ class DGLDataset(object):
        Default: ~/.dgl/
    save_dir : str
        Directory to save the processed dataset.
-        Default: ~/.dgl/
+        Default: same as raw_dir
    force_reload : bool
        Whether to reload the dataset. Default: False
    verbose : bool
@@ -190,14 +191,16 @@ class DGLDataset(object):
        """
        return self._verbose

+    @abc.abstractmethod
    def __getitem__(self, idx):
        r"""Gets the data object at index.
        """
-        raise NotImplementedError
+        pass

+    @abc.abstractmethod
    def __len__(self):
        r"""The number of examples in the dataset."""
-        raise NotImplementedError
+        pass

 class DGLBuiltinDataset(DGLDataset):
    r"""The Basic DGL Builtin Dataset.

--- a/python/dgl/data/rdf.py
+++ b/python/dgl/data/rdf.py
--- a/python/dgl/data/utils.py
+++ b/python/dgl/data/utils.py
@@ -5,13 +5,16 @@ import os
 import sys
 import hashlib
 import warnings
-import numpy as np
-import warnings
 import requests
+import pickle
+import errno
+import numpy as np

 from .graph_serialize import save_graphs, load_graphs, load_labels
 from .tensor_serialize import save_tensors, load_tensors

+from .. import backend as F
+
 __all__ = ['loadtxt','download', 'check_sha1', 'extract_archive',
           'get_download_dir', 'Subset', 'split_dataset',
           'save_graphs', "load_graphs", "load_labels", "save_tensors", "load_tensors"]
@@ -237,6 +240,13 @@ def get_download_dir():
        os.makedirs(dirname)
    return dirname

+def makedirs(path):
+    try:
+        os.makedirs(os.path.expanduser(os.path.normpath(path)))
+    except OSError as e:
+        if e.errno != errno.EEXIST and os.path.isdir(path):
+            raise e
+
 def save_info(path, info):
    """ Save dataset related information into disk.

@@ -268,6 +278,39 @@ def load_info(path):
        info = pickle.load(pf)
    return info

+def deprecate_property(old, new):
+    warnings.warn('Property {} will be deprecated, please use {} instead.'.format(old, new))
+
+
+def deprecate_function(old, new):
+    warnings.warn('Function {} will be deprecated, please use {} instead.'.format(old, new))
+
+
+def deprecate_class(old, new):
+    warnings.warn('Class {} will be deprecated, please use {} instead.'.format(old, new))
+
+def idx2mask(idx, len):
+    """Create mask."""
+    mask = np.zeros(len)
+    mask[idx] = 1
+    return mask
+
+def generate_mask_tensor(mask):
+    """Generate mask tensor according to different backend
+    For torch and tensorflow, it will create a bool tensor
+    For mxnet, it will create a float tensor
+    Parameters
+    ----------
+    mask: numpy ndarray
+        input mask tensor
+    """
+    assert isinstance(mask, np.ndarray), "input for generate_mask_tensor" \
+        "should be an numpy ndarray"
+    if F.backend_name == 'mxnet':
+        return F.tensor(mask, dtype=F.data_type_dict['float32'])
+    else:
+        return F.tensor(mask, dtype=F.data_type_dict['bool'])
+
 class Subset(object):
    """Subset of a dataset at specified indices