Merge branch 'master' into dist_part

e9b624fe · Minjie Wang · GitHub · 8086d1ed · a88e7f7e · e9b624fe
Unverified Commit e9b624fe authored Aug 11, 2022 by Minjie Wang Committed by GitHub Aug 11, 2022
20 changed files
--- a/.github/ISSUE_TEMPLATE/--work-item--dev-only-.md
+++ b/.github/ISSUE_TEMPLATE/--work-item--dev-only-.md
+---
+name: "\U0001F528Work Item (DEV ONLY)"
+about: Work item issue for tracking progress. Dev team only.
+title: ''
+labels: Work Item
+assignees: ''
+---
+## 🔨Work Item
+**IMPORTANT:**
+* This template is only for dev team to track project progress. For feature request or bug report, please use the corresponding issue templates.
+* DO NOT create a new work item if the purpose is to fix an existing issue or feature request. We will directly use the issue in the project tracker.
+Project tracker: https://github.com/orgs/dmlc/projects/2
+## Description
+<!-- short description of the work item -->
+## Depending work items or issues
+<!-- what must be done before this -->
--- a/.github/ISSUE_TEMPLATE/bug-report.md
+++ b/.github/ISSUE_TEMPLATE/bug-report.md
 ---
 name: "\U0001F41B Bug Report"
 about: Submit a bug report to help us improve DGL
+title: ''
---
+labels: ''
+assignees: ''
-## 🐛 Bug
+---
-<!-- A clear and concise description of what the bug is. -->
+## 🐛 Bug
-## To Reproduce
+<!-- A clear and concise description of what the bug is. -->
-Steps to reproduce the behavior:
+## To Reproduce
-1.
-1.
+Steps to reproduce the behavior:
-1.
+1.
-<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
+1.
+1.
-## Expected behavior
+<!-- If you have a code sample, error messages, stack traces, please provide it here as well -->
-<!-- A clear and concise description of what you expected to happen. -->
+## Expected behavior
-## Environment
+<!-- A clear and concise description of what you expected to happen. -->
- - DGL Version (e.g., 1.0):
- - Backend Library & Version (e.g., PyTorch 0.4.1, MXNet/Gluon 1.3):
+## Environment
- - OS (e.g., Linux):
- - How you installed DGL (`conda`, `pip`, source):
+ - DGL Version (e.g., 1.0):
- - Build command you used (if compiling from source):
+ - Backend Library & Version (e.g., PyTorch 0.4.1, MXNet/Gluon 1.3):
- - Python version:
+ - OS (e.g., Linux):
- - CUDA/cuDNN version (if applicable):
+ - How you installed DGL (`conda`, `pip`, source):
- - GPU models and configuration (e.g. V100):
+ - Build command you used (if compiling from source):
- - Any other relevant information:
+ - Python version:
+ - CUDA/cuDNN version (if applicable):
-## Additional context
+ - GPU models and configuration (e.g. V100):
+ - Any other relevant information:
-<!-- Add any other context about the problem here. -->
+## Additional context
+<!-- Add any other context about the problem here. -->
--- a/.github/ISSUE_TEMPLATE/documentation.md
+++ b/.github/ISSUE_TEMPLATE/documentation.md
 ---
 name: "\U0001F4DA Documentation"
 about: Report an issue related to docs.dgl.ai
+title: ''
---
+labels: ''
+assignees: ''
-## 📚 Documentation
+---
-<!-- Please specify whether it's tutorial part or API reference part-->
-<!-- Describe the issue.-->
+## 📚 Documentation
+<!-- Please specify whether it's tutorial part or API reference part-->
+<!-- Describe the issue.-->
--- a/.github/ISSUE_TEMPLATE/feature-request.md
+++ b/.github/ISSUE_TEMPLATE/feature-request.md
 ---
 name: "\U0001F680Feature Request"
 about: Submit a proposal/request for a new DGL feature
+title: ''
---
+labels: ''
+assignees: ''
-## 🚀 Feature
-<!-- A brief description of the feature proposal -->
+---
-## Motivation
+## 🚀 Feature
+<!-- A brief description of the feature proposal -->
-<!-- Please outline the motivation for the proposal. Is your feature request
-related to a problem? e.g., I'm always frustrated when [...]. If this is
+## Motivation
-related to another GitHub issue, please link here too -->
+<!-- Please outline the motivation for the proposal. Is your feature request
-## Alternatives
+related to a problem? e.g., I'm always frustrated when [...]. If this is
+related to another GitHub issue, please link here too -->
-<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
+## Alternatives
-## Pitch
+<!-- A clear and concise description of any alternative solutions or features you've considered, if any. -->
-<!-- A clear and concise description of what you want to happen. -->
+## Pitch
-## Additional context
+<!-- A clear and concise description of what you want to happen. -->
-<!-- Add any other context or screenshots about the feature request here. -->
+## Additional context
+<!-- Add any other context or screenshots about the feature request here. -->
--- a/.github/ISSUE_TEMPLATE/questions-help-support.md
+++ b/.github/ISSUE_TEMPLATE/questions-help-support.md
 ---
 name: "❓Questions/Help/Support"
 about: Do you need support? We have resources.
+title: ''
---
+labels: ''
+assignees: ''
-## ❓ Questions and Help
+---
-Before proceeding, please note that we recommend
-using our discussion forum (https://discuss.dgl.ai) for
+## ❓ Questions and Help
-general questions. As a result, this issue will
-likely be CLOSED shortly.
+Before proceeding, please note that we recommend
+using our discussion forum (https://discuss.dgl.ai) for
+general questions. As a result, this issue will
+likely be CLOSED shortly.
--- a/.gitignore
+++ b/.gitignore
@@ -28,6 +28,9 @@ wheels/
 *.egg
 MANIFEST
+# Whitelist some distribution / package non-related directories
+!tests/dist
 # PyInstaller
 #  Usually these files are written by a python script from a template
 #  before PyInstaller builds the exe, so as to inject date/other infos into it.

--- a/.gitmodules
+++ b/.gitmodules
@@ -12,8 +12,7 @@
 	url = https://github.com/KarypisLab/METIS.git
 [submodule "third_party/cub"]
 	path = third_party/cub
-	url = https://github.com/NVlabs/cub.git
+	url = https://github.com/NVIDIA/cub.git
-	branch = 1.8.0
 [submodule "third_party/phmap"]
 	path = third_party/phmap
 	url = https://github.com/greg7mdp/parallel-hashmap.git

--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -46,18 +46,12 @@ endif(NOT MSVC)
 if(USE_CUDA)
  message(STATUS "Build with CUDA support")
  project(dgl C CXX)
+  # see https://github.com/NVIDIA/thrust/issues/1401
+  add_definitions(-DTHRUST_CUB_WRAPPED_NAMESPACE=dgl)
  include(cmake/modules/CUDA.cmake)
-  if ((CUDA_VERSION_MAJOR LESS 11) OR
+  message(STATUS "Use external CUB/Thrust library for a consistent API and performance.")
-      ((CUDA_VERSION_MAJOR EQUAL 11) AND (CUDA_VERSION_MINOR EQUAL 0)))
+  cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/thrust")
-    # For cuda<11, use external CUB/Thrust library because CUB is not part of CUDA.
+  cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cub")
-    # For cuda==11.0, use external CUB/Thrust library because there is a bug in the
-    #   official CUB library which causes invalid device ordinal error for DGL. The bug
-    #   is fixed by https://github.com/NVIDIA/cub/commit/9143e47e048641aa0e6ddfd645bcd54ff1059939
-    #   in 11.1.
-    message(STATUS "Detected CUDA of version ${CUDA_VERSION}. Use external CUB/Thrust library.")
-    cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/thrust")
-    cuda_include_directories(BEFORE "${CMAKE_SOURCE_DIR}/third_party/cub")
-  endif()
 endif(USE_CUDA)
 # initial variables
@@ -304,6 +298,7 @@ if(BUILD_TORCH)
      ${CMAKE_COMMAND} -E env
      CMAKE_COMMAND=${CMAKE_CMD}
      CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
+      USE_CUDA=${USE_CUDA}
      BINDIR=${BINDIR}
      cmd /e:on /c ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS}
      DEPENDS ${BUILD_SCRIPT}
@@ -315,6 +310,7 @@ if(BUILD_TORCH)
      ${CMAKE_COMMAND} -E env
      CMAKE_COMMAND=${CMAKE_CMD}
      CUDA_TOOLKIT_ROOT_DIR=${CUDA_TOOLKIT_ROOT_DIR}
+      USE_CUDA=${USE_CUDA}
      BINDIR=${CMAKE_CURRENT_BINARY_DIR}
      bash ${BUILD_SCRIPT} ${TORCH_PYTHON_INTERPS}
      DEPENDS ${BUILD_SCRIPT}

--- a/CONTRIBUTORS.md
+++ b/CONTRIBUTORS.md
@@ -63,3 +63,4 @@ Contributors
 * [Shaked Brody](https://github.com/shakedbr) from Technion
 * [Jiahui Liu](https://github.com/paoxiaode) from Nvidia
 * [Neil Dickson](https://github.com/ndickson-nvidia) from Nvidia
+* [Chang Liu](https://github.com/chang-l) from Nvidia
--- a/Jenkinsfile
+++ b/Jenkinsfile
 #!/usr/bin/env groovy
-dgl_linux_libs = 'build/libdgl.so, build/runUnitTests, python/dgl/_ffi/_cy3/core.cpython-36m-x86_64-linux-gnu.so, build/tensoradapter/pytorch/*.so'
+dgl_linux_libs = 'build/libdgl.so, build/runUnitTests, python/dgl/_ffi/_cy3/core.cpython-*-x86_64-linux-gnu.so, build/tensoradapter/pytorch/*.so'
 // Currently DGL on Windows is not working with Cython yet
 dgl_win64_libs = "build\\dgl.dll, build\\runUnitTests.exe, build\\tensoradapter\\pytorch\\*.dll"
@@ -62,6 +62,22 @@ def unit_test_linux(backend, dev) {
  }
 }
+def unit_distributed_linux(backend, dev) {
+  init_git()
+  unpack_lib("dgl-${dev}-linux", dgl_linux_libs)
+  timeout(time: 30, unit: 'MINUTES') {
+    sh "bash tests/scripts/task_distributed_test.sh ${backend} ${dev}"
+  }
+}
+def unit_test_cugraph(backend, dev) {
+  init_git()
+  unpack_lib("dgl-${dev}-linux", dgl_linux_libs)
+  timeout(time: 15, unit: 'MINUTES') {
+    sh "bash tests/scripts/cugraph_unit_test.sh ${backend}"
+  }
+}
 def unit_test_win64(backend, dev) {
  init_git_win64()
  unpack_lib("dgl-${dev}-win64", dgl_win64_libs)
@@ -239,6 +255,24 @@ pipeline {
                }
              }
            }
+            stage('PyTorch Cugraph GPU Build') {
+              agent {
+                docker {
+                  label "linux-cpu-node"
+                  image "nvcr.io/nvidia/pytorch:22.04-py3"
+                  args "-u root"
+                  alwaysPull false
+                }
+              }
+              steps {
+                build_dgl_linux('cugraph')
+              }
+              post {
+                always {
+                  cleanWs disableDeferredWipeout: true, deleteDirs: true
+                }
+              }
+            }
            stage('CPU Build (Win64)') {
              // Windows build machines are manually added to Jenkins master with
              // "windows" label as permanent agents.
@@ -426,6 +460,51 @@ pipeline {
                }
              }
            }
+            stage('Distributed') {
+              agent {
+                docker {
+                  label "linux-cpu-node"
+                  image "dgllib/dgl-ci-cpu:cu101_v220629"
+                  args "--shm-size=4gb"
+                  alwaysPull true
+                }
+              }
+              stages {
+                stage('Distributed Torch CPU Unit test') {
+                  steps {
+                    unit_distributed_linux('pytorch', 'cpu')
+                  }
+                }
+              }
+              post {
+                always {
+                  cleanWs disableDeferredWipeout: true, deleteDirs: true
+                }
+              }
+            }
+            stage('PyTorch Cugraph GPU') {
+              agent {
+                docker {
+                  label "linux-gpu-node"
+                  image "nvcr.io/nvidia/pytorch:22.04-py3"
+                  args "--runtime nvidia --shm-size=8gb"
+                  alwaysPull false
+                }
+              }
+              stages {
+                stage('PyTorch Cugraph GPU Unit test') {
+                  steps {
+                    sh 'nvidia-smi'
+                    unit_test_cugraph('pytorch', 'cugraph')
+                  }
+                }
+              }
+              post {
+                always {
+                  cleanWs disableDeferredWipeout: true, deleteDirs: true
+                }
+              }
+            }
            stage('MXNet CPU') {
              agent {
                docker {

--- a/README.md
+++ b/README.md
@@ -23,7 +23,7 @@ DGL is an easy-to-use, high performance and scalable Python package for deep lea
 ### A GPU-ready graph library
-DGL provides a powerful graph object that can reside on either CPU or GPU. It bundles structural data as well as features for a better control. We provide a variety of functions for computing with graph objects including efficient and customizable message passing primitives for Graph Neural Networks.
+DGL provides a powerful graph object that can reside on either CPU or GPU. It bundles structural data as well as features for better control. We provide a variety of functions for computing with graph objects including efficient and customizable message passing primitives for Graph Neural Networks.
 ### A versatile tool for GNN researchers and practitioners
@@ -32,7 +32,7 @@ DGL collects a rich set of [example implementations](https://github.com/dmlc/dgl
 ### Easy to learn and use
-DGL provides a plenty of learning materials for all kinds of users from ML researcher to domain experts. The [Blitz Introduction to DGL](https://docs.dgl.ai/tutorials/blitz/index.html) is a 120-minute tour of the basics of graph machine learning. The [User Guide](https://docs.dgl.ai/guide/index.html) explains in more details the concepts of graphs as well as the training methodology. All of them include code snippets in DGL that are runnable and ready to be plugged into one’s own pipeline.
+DGL provides plenty of learning materials for all kinds of users from ML researchers to domain experts. The [Blitz Introduction to DGL](https://docs.dgl.ai/tutorials/blitz/index.html) is a 120-minute tour of the basics of graph machine learning. The [User Guide](https://docs.dgl.ai/guide/index.html) explains in more details the concepts of graphs as well as the training methodology. All of them include code snippets in DGL that are runnable and ready to be plugged into one’s own pipeline.
 ### Scalable and efficient
@@ -85,7 +85,7 @@ Take the survey [here](https://forms.gle/Ej3jHCocACmb49Gp8) and leave any feedba
 * TGL: A graph learning framework for large-scale temporal graphs. https://github.com/amazon-research/tgl
 * gtrick: Bag of Tricks for Graph Neural Networks. https://github.com/sangyx/gtrick
 * ArangoDB-DGL Adapter: Import [ArangoDB](https://github.com/arangodb/arangodb) graphs into DGL and vice-versa. https://github.com/arangoml/dgl-adapter
+* DGLD: [DGLD](https://github.com/EagleLab-ZJU/DGLD) is an open-source library for Deep Graph Anomaly Detection based on pytorch and DGL.
 ### Awesome Papers Using DGL
 1. [**Benchmarking Graph Neural Networks**](https://arxiv.org/pdf/2003.00982.pdf), *Vijay Prakash Dwivedi, Chaitanya K. Joshi, Thomas Laurent, Yoshua Bengio, Xavier Bresson*

--- a/benchmarks/benchmarks/api/bench_edge_subgraph.py
+++ b/benchmarks/benchmarks/api/bench_edge_subgraph.py
@@ -11,7 +11,7 @@ from .. import utils
 @utils.skip_if_gpu()
 @utils.benchmark('time')
 @utils.parametrize('graph_name', ['livejournal', 'reddit'])
-@utils.parametrize('format', ['coo', 'csc'])
+@utils.parametrize('format', ['coo'])
 @utils.parametrize('seed_egdes_num', [500, 5000, 50000])
 def track_time(graph_name, format, seed_egdes_num):
    device = utils.get_bench_device()

--- a/benchmarks/benchmarks/api/bench_random_walk.py
+++ b/benchmarks/benchmarks/api/bench_random_walk.py
@@ -10,8 +10,9 @@ def _random_walk(g, seeds, length):
 def _node2vec(g, seeds, length):
    return dgl.sampling.node2vec_random_walk(g, seeds, 1, 1, length)
+@utils.skip_if_gpu()
 @utils.benchmark('time')
-@utils.parametrize_cpu('graph_name', ['cora', 'livejournal', 'friendster'])
+@utils.parametrize('graph_name', ['cora', 'livejournal', 'friendster'])
 @utils.parametrize('num_seeds', [10, 100, 1000])
 @utils.parametrize('length', [2, 5, 10, 20])
 @utils.parametrize('algorithm', ['_random_walk', '_node2vec'])

--- a/benchmarks/benchmarks/api/bench_to_block.py
+++ b/benchmarks/benchmarks/api/bench_to_block.py
@@ -8,7 +8,7 @@ from .. import utils
 @utils.skip_if_gpu()
 @utils.benchmark('time', timeout=1200)
-@utils.parametrize('graph_name', ['reddit', "ogbn-product"])
+@utils.parametrize('graph_name', ['reddit', "ogbn-products"])
 @utils.parametrize('num_seed_nodes', [32, 256, 1024, 2048])
 @utils.parametrize('fanout', [5, 10, 20])
 def track_time(graph_name, num_seed_nodes, fanout):

--- a/benchmarks/benchmarks/model_acc/bench_rgcn.py
+++ b/benchmarks/benchmarks/model_acc/bench_rgcn.py
-import numpy as np
-import dgl
-from dgl.nn.pytorch import RelGraphConv
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .. import utils
-class RGCN(nn.Module):
-    def __init__(self,
-                 num_nodes,
-                 n_hidden,
-                 num_classes,
-                 num_rels,
-                 num_bases,
-                 num_hidden_layers,
-                 dropout,
-                 low_mem):
-        super(RGCN, self).__init__()
-        self.layers = nn.ModuleList()
-        # i2h
-        self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
-                                        num_bases, activation=F.relu, dropout=dropout,
-                                        low_mem=low_mem))
-        # h2h
-        for i in range(num_hidden_layers):
-            self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
-                                            num_bases, activation=F.relu, dropout=dropout,
-                                            low_mem=low_mem))
-        # o2h
-        self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
-                                        num_bases, activation=None, low_mem=low_mem))
-    def forward(self, g, h, r, norm):
-        for layer in self.layers:
-            h = layer(g, h, r, norm)
-        return h
-def evaluate(model, g, feats, edge_type, edge_norm, labels, idx):
-    model.eval()
-    with torch.no_grad():
-        logits = model(g, feats, edge_type, edge_norm)
-        logits = logits[idx]
-        _, indices = torch.max(logits, dim=1)
-        correct = torch.sum(indices == labels)
-        return correct.item() * 1.0 / len(labels) * 100
-@utils.benchmark('acc')
-@utils.parametrize('data', ['aifb', 'mutag'])
-@utils.parametrize('lowmem', [True, False])
-@utils.parametrize('use_type_count', [True, False])
-def track_acc(data, lowmem, use_type_count):
-    # args
-    if data == 'aifb':
-        num_bases = -1
-        l2norm = 0.
-    elif data == 'mutag':
-        num_bases = 30
-        l2norm = 5e-4
-    elif data == 'am':
-        num_bases = 40
-        l2norm = 5e-4
-    else:
-        raise ValueError()
-    data = utils.process_data(data)
-    device = utils.get_bench_device()
-    g = data[0]
-    num_rels = len(g.canonical_etypes)
-    category = data.predict_category
-    num_classes = data.num_classes
-    train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
-    test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
-    labels = g.nodes[category].data.pop('labels').to(device)
-    # calculate norm for each edge type and store in edge
-    for canonical_etype in g.canonical_etypes:
-        u, v, eid = g.all_edges(form='all', etype=canonical_etype)
-        _, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
-        degrees = count[inverse_index]
-        norm = 1. / degrees.float()
-        norm = norm.unsqueeze(1)
-        g.edges[canonical_etype].data['norm'] = norm
-    # get target category id
-    category_id = len(g.ntypes)
-    for i, ntype in enumerate(g.ntypes):
-        if ntype == category:
-            category_id = i
-    if use_type_count:
-        g, _, edge_type = dgl.to_homogeneous(g, edata=['norm'], return_count=True)
-        g = g.to(device)
-    else:
-        g = dgl.to_homogeneous(g, edata=['norm']).to(device)
-        edge_type = g.edata.pop(dgl.ETYPE).long()
-    num_nodes = g.number_of_nodes()
-    edge_norm = g.edata['norm']
-    # find out the target node ids in g
-    target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
-    train_idx = target_idx[train_mask]
-    test_idx = target_idx[test_mask]
-    train_labels = labels[train_mask]
-    test_labels = labels[test_mask]
-    # since the nodes are featureless, the input feature is then the node id.
-    feats = torch.arange(num_nodes, device=device)
-    # create model
-    model = RGCN(num_nodes, 
-                 16,
-                 num_classes,
-                 num_rels,
-                 num_bases,
-                 0,
-                 0,
-                 lowmem).to(device)
-    optimizer = torch.optim.Adam(model.parameters(),
-                                 lr=1e-2,
-                                 weight_decay=l2norm)
-    model.train()
-    for epoch in range(30):
-        logits = model(g, feats, edge_type, edge_norm)
-        loss = F.cross_entropy(logits[train_idx], train_labels)
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-    acc = evaluate(model, g, feats, edge_type, edge_norm, test_labels, test_idx)
-    return acc
--- a/benchmarks/benchmarks/model_acc/bench_rgcn_base.py
+++ b/benchmarks/benchmarks/model_acc/bench_rgcn_base.py
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchmetrics.functional import accuracy
+from .. import utils
+from .. import rgcn
+@utils.benchmark('acc', timeout=1200)
+@utils.parametrize('dataset', ['aifb', 'mutag'])
+@utils.parametrize('ns_mode', [False])
+def track_acc(dataset, ns_mode):
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = rgcn.load_data(
+        dataset, get_norm=True)
+    num_hidden = 16
+    if dataset == 'aifb':
+        num_bases = -1
+        l2norm = 0.
+    elif dataset == 'mutag':
+        num_bases = 30
+        l2norm = 5e-4
+    elif dataset == 'am':
+        num_bases = 40
+        l2norm = 5e-4
+    else:
+        raise ValueError()
+    model = rgcn.RGCN(g.num_nodes(),
+                 num_hidden,
+                 num_classes,
+                 num_rels,
+                 num_bases=num_bases,
+                 ns_mode=ns_mode)
+    device = utils.get_bench_device()
+    labels = labels.to(device)
+    model = model.to(device)
+    g = g.int().to(device)
+    optimizer = torch.optim.Adam(
+        model.parameters(), lr=1e-2, weight_decay=l2norm)
+    model.train()
+    for epoch in range(30):
+        logits = model(g)
+        logits = logits[target_idx]
+        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+    model.eval()
+    with torch.no_grad():
+        logits = model(g)
+    logits = logits[target_idx]
+    test_acc = accuracy(logits[test_idx].argmax(
+        dim=1), labels[test_idx]).item()
+    return test_acc
--- a/benchmarks/benchmarks/model_acc/bench_rgcn_ns.py
+++ b/benchmarks/benchmarks/model_acc/bench_rgcn_ns.py
@@ -34,9 +34,6 @@ class EntityClassify(nn.Module):
        Dropout
    use_self_loop : bool
        Use self loop if True, default False.
-    low_mem : bool
-        True to use low memory implementation of relation message passing function
-        trade speed with memory consumption
    """
    def __init__(self,
                 device,
@@ -48,7 +45,6 @@ class EntityClassify(nn.Module):
                 num_hidden_layers=1,
                 dropout=0,
                 use_self_loop=False,
-                 low_mem=False,
                 layer_norm=False):
        super(EntityClassify, self).__init__()
        self.device = device
@@ -60,7 +56,6 @@ class EntityClassify(nn.Module):
        self.num_hidden_layers = num_hidden_layers
        self.dropout = dropout
        self.use_self_loop = use_self_loop
-        self.low_mem = low_mem
        self.layer_norm = layer_norm
        self.layers = nn.ModuleList()
@@ -68,19 +63,19 @@ class EntityClassify(nn.Module):
        self.layers.append(RelGraphConv(
            self.h_dim, self.h_dim, self.num_rels, "basis",
            self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
-            low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm))
+            dropout=self.dropout, layer_norm = layer_norm))
        # h2h
        for idx in range(self.num_hidden_layers):
            self.layers.append(RelGraphConv(
                self.h_dim, self.h_dim, self.num_rels, "basis",
                self.num_bases, activation=F.relu, self_loop=self.use_self_loop,
-                low_mem=self.low_mem, dropout=self.dropout, layer_norm = layer_norm))
+                dropout=self.dropout, layer_norm = layer_norm))
        # h2o
        self.layers.append(RelGraphConv(
            self.h_dim, self.out_dim, self.num_rels, "basis",
            self.num_bases, activation=None,
            self_loop=self.use_self_loop,
-            low_mem=self.low_mem, layer_norm = layer_norm))
+            layer_norm = layer_norm))
    def forward(self, blocks, feats, norm=None):
        if blocks is None:
@@ -195,8 +190,7 @@ def evaluate(model, embed_layer, eval_loader, node_feats):
    return eval_logits, eval_seeds
+@utils.benchmark('acc', timeout=3600)  # ogbn-mag takes ~1 hour to train
-@utils.benchmark('time', 3600)  # ogbn-mag takes ~1 hour to train
 @utils.parametrize('data', ['am', 'ogbn-mag'])
 def track_acc(data):
    dataset = utils.process_data(data)
@@ -220,7 +214,6 @@ def track_acc(data):
    dropout = 0.5
    use_self_loop = True
    lr = 0.01
-    low_mem = True
    num_workers = 4
    hg = dataset[0]
@@ -306,7 +299,6 @@ def track_acc(data):
                           num_hidden_layers=n_layers - 2,
                           dropout=dropout,
                           use_self_loop=use_self_loop,
-                           low_mem=low_mem,
                           layer_norm=False)
    embed_layer = embed_layer.to(device)

--- a/benchmarks/benchmarks/model_speed/bench_rgcn.py
+++ b/benchmarks/benchmarks/model_speed/bench_rgcn.py
-import time
-import numpy as np
-import dgl
-from dgl.nn.pytorch import RelGraphConv
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from .. import utils
-class RGCN(nn.Module):
-    def __init__(self,
-                 num_nodes,
-                 n_hidden,
-                 num_classes,
-                 num_rels,
-                 num_bases,
-                 num_hidden_layers,
-                 dropout):
-        super(RGCN, self).__init__()
-        self.layers = nn.ModuleList()
-        # i2h
-        self.layers.append(RelGraphConv(num_nodes, n_hidden, num_rels, "basis",
-                                        num_bases, activation=F.relu, dropout=dropout))
-        # h2h
-        for i in range(num_hidden_layers):
-            self.layers.append(RelGraphConv(n_hidden, n_hidden, num_rels, "basis",
-                                            num_bases, activation=F.relu, dropout=dropout))
-        # o2h
-        self.layers.append(RelGraphConv(n_hidden, num_classes, num_rels, "basis",
-                                        num_bases, activation=None))
-    def forward(self, g, h, r, norm):
-        for layer in self.layers:
-            h = layer(g, h, r, norm)
-        return h
-@utils.benchmark('time', 300)
-@utils.parametrize('data', ['aifb'])
-@utils.parametrize('use_type_count', [True, False])
-def track_time(data, use_type_count):
-    # args
-    if data == 'aifb':
-        if dgl.__version__.startswith("0.8"):
-            num_bases = None
-        else:
-            num_bases = -1
-        l2norm = 0.
-    elif data == 'am':
-        num_bases = 40
-        l2norm = 5e-4
-    else:
-        raise ValueError()
-    data = utils.process_data(data)
-    device = utils.get_bench_device()
-    num_epochs = 30
-    g = data[0]
-    num_rels = len(g.canonical_etypes)
-    category = data.predict_category
-    num_classes = data.num_classes
-    train_mask = g.nodes[category].data.pop('train_mask').bool().to(device)
-    test_mask = g.nodes[category].data.pop('test_mask').bool().to(device)
-    labels = g.nodes[category].data.pop('labels').to(device)
-    # calculate norm for each edge type and store in edge
-    for canonical_etype in g.canonical_etypes:
-        u, v, eid = g.all_edges(form='all', etype=canonical_etype)
-        _, inverse_index, count = torch.unique(v, return_inverse=True, return_counts=True)
-        degrees = count[inverse_index]
-        norm = 1. / degrees.float()
-        norm = norm.unsqueeze(1)
-        g.edges[canonical_etype].data['norm'] = norm
-    # get target category id
-    category_id = len(g.ntypes)
-    for i, ntype in enumerate(g.ntypes):
-        if ntype == category:
-            category_id = i
-    if use_type_count:
-        g, _, edge_type = dgl.to_homogeneous(g, edata=['norm'], return_count=True)
-        g = g.to(device)
-    else:
-        g = dgl.to_homogeneous(g, edata=['norm']).to(device)
-        edge_type = g.edata.pop(dgl.ETYPE).long()
-    num_nodes = g.number_of_nodes()
-    edge_norm = g.edata['norm']
-    # find out the target node ids in g
-    target_idx = torch.where(g.ndata[dgl.NTYPE] == category_id)[0]
-    train_idx = target_idx[train_mask]
-    test_idx = target_idx[test_mask]
-    train_labels = labels[train_mask]
-    test_labels = labels[test_mask]
-    # since the nodes are featureless, the input feature is then the node id.
-    feats = torch.arange(num_nodes, device=device)
-    # create model
-    model = RGCN(num_nodes, 
-                 16,
-                 num_classes,
-                 num_rels,
-                 num_bases,
-                 0,
-                 0).to(device)
-    optimizer = torch.optim.Adam(model.parameters(),
-                                 lr=1e-2,
-                                 weight_decay=l2norm)
-    model.train()
-    t0 = time.time()
-    for epoch in range(num_epochs):
-        logits = model(g, feats, edge_type, edge_norm)
-        loss = F.cross_entropy(logits[train_idx], train_labels)
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-    t1 = time.time()
-    return (t1 - t0) / num_epochs
--- a/benchmarks/benchmarks/model_speed/bench_rgcn_base.py
+++ b/benchmarks/benchmarks/model_speed/bench_rgcn_base.py
+import time
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchmetrics.functional import accuracy
+from .. import utils
+from .. import rgcn
+@utils.benchmark('time', 1200)
+@utils.parametrize('data', ['aifb', 'am'])
+def track_time(data):
+    # args
+    if data == 'aifb':
+        num_bases = -1
+        l2norm = 0.
+    elif data == 'am':
+        num_bases = 40
+        l2norm = 5e-4
+    else:
+        raise ValueError()
+    g, num_rels, num_classes, labels, train_idx, test_idx, target_idx = rgcn.load_data(
+        data, get_norm=True)
+    num_hidden = 16
+    model = rgcn.RGCN(g.num_nodes(),
+                 num_hidden,
+                 num_classes,
+                 num_rels,
+                 num_bases=num_bases)
+    device = utils.get_bench_device()
+    labels = labels.to(device)
+    model = model.to(device)
+    g = g.int().to(device)
+    optimizer = torch.optim.Adam(
+        model.parameters(), lr=1e-2, weight_decay=l2norm)
+    model.train()
+    num_epochs = 30
+    t0 = time.time()
+    for epoch in range(num_epochs):
+        logits = model(g)
+        logits = logits[target_idx]
+        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+    t1 = time.time()
+    return (t1 - t0) / num_epochs
--- a/benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py
+++ b/benchmarks/benchmarks/model_speed/bench_rgcn_hetero_ns.py
@@ -228,15 +228,12 @@ class EntityClassify(nn.Module):
        return h
 @utils.benchmark('time', 600)
-@utils.parametrize('data', ['am', 'ogbn-mag'])
+@utils.parametrize('data', ['ogbn-mag'])
 def track_time(data):
    dataset = utils.process_data(data)
    device = utils.get_bench_device()
-    if data == 'am':
+    if data == 'ogbn-mag':
-        n_bases = 40
-        l2norm = 5e-4
-    elif data == 'ogbn-mag':
        n_bases = 2
        l2norm = 0
    else: