[Misc] Black auto fix. (#4680)

* [Misc] Black auto fix. * fix pylint disable Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Black auto fix. (#4680)
* [Misc] Black auto fix. * fix pylint disable Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
a208e886 · Hongzhi (Steve), Chen · GitHub · 29434e65 · a208e886 · a208e886
Unverified Commit a208e886 authored Oct 08, 2022 by Hongzhi (Steve), Chen Committed by GitHub Oct 08, 2022
20 changed files
--- a/python/dgl/backend/pytorch/tensor.py
+++ b/python/dgl/backend/pytorch/tensor.py
 from __future__ import absolute_import

+import builtins
+import numbers
 from distutils.version import LooseVersion

-import scipy # Weird bug in new pytorch when import scipy after import torch
 import numpy as np
+import scipy  # Weird bug in new pytorch when import scipy after import torch
 import torch as th
-import builtins
-import numbers
 from torch.utils import dlpack

 from ... import ndarray as nd
@@ -16,24 +16,33 @@ from ...function.base import TargetCode
 if LooseVersion(th.__version__) < LooseVersion("1.9.0"):
    raise RuntimeError("DGL requires PyTorch >= 1.9.0")

+
 def data_type_dict():
-    return {'float16' : th.float16,
-            'float32' : th.float32,
-            'float64' : th.float64,
-            'uint8'   : th.uint8,
-            'int8'    : th.int8,
-            'int16'   : th.int16,
-            'int32'   : th.int32,
-            'int64'   : th.int64,
-            'bool'    : th.bool}
+    return {
+        "float16": th.float16,
+        "float32": th.float32,
+        "float64": th.float64,
+        "uint8": th.uint8,
+        "int8": th.int8,
+        "int16": th.int16,
+        "int32": th.int32,
+        "int64": th.int64,
+        "bool": th.bool,
+    }
+

 def cpu():
-    return th.device('cpu')
+    return th.device("cpu")
+

 def tensor(data, dtype=None):
    if isinstance(data, numbers.Number):
        data = [data]
-    if isinstance(data, list) and len(data) > 0 and isinstance(data[0], th.Tensor):
+    if (
+        isinstance(data, list)
+        and len(data) > 0
+        and isinstance(data[0], th.Tensor)
+    ):
        # prevent GPU->CPU->GPU copies
        if data[0].ndim == 0:
            # zero dimenion scalar tensors
@@ -43,9 +52,11 @@ def tensor(data, dtype=None):
    else:
        return th.as_tensor(data, dtype=dtype)

+
 def as_scalar(data):
    return data.item()

+
 def get_preferred_sparse_format():
    """Get the preferred sparse matrix format supported by the backend.

@@ -54,187 +65,241 @@ def get_preferred_sparse_format():
    """
    return "coo"

+
 def sparse_matrix(data, index, shape, force_format=False):
    fmt = index[0]
-    if fmt != 'coo':
-        raise TypeError('Pytorch backend only supports COO format. But got %s.' % fmt)
+    if fmt != "coo":
+        raise TypeError(
+            "Pytorch backend only supports COO format. But got %s." % fmt
+        )
    spmat = th.sparse_coo_tensor(index[1], data, shape)
    return spmat, None

+
 def sparse_matrix_indices(spmat):
-    return ('coo', spmat._indices())
+    return ("coo", spmat._indices())
+

 def is_tensor(obj):
    return isinstance(obj, th.Tensor)

+
 def shape(input):
    return input.shape

+
 def dtype(input):
    return input.dtype

+
 def ndim(input):
    return input.dim()

+
 def context(input):
    return input.device

+
 def device_type(ctx):
    return th.device(ctx).type

+
 def device_id(ctx):
    ctx = th.device(ctx)
    if ctx.index is None:
-        return 0 if ctx.type == 'cpu' else th.cuda.current_device()
+        return 0 if ctx.type == "cpu" else th.cuda.current_device()
    else:
        return ctx.index

+
 def to_backend_ctx(dglctx):
    dev_type = dglctx.device_type
    if dev_type == 1:
-        return th.device('cpu')
+        return th.device("cpu")
    elif dev_type == 2:
-        return th.device('cuda', dglctx.device_id)
+        return th.device("cuda", dglctx.device_id)
    else:
-        raise ValueError('Unsupported DGL device context:', dglctx)
+        raise ValueError("Unsupported DGL device context:", dglctx)
+

 def astype(input, ty):
    return input.type(ty)

+
 def asnumpy(input):
    if isinstance(input, th.sparse.FloatTensor):
        return input.to_dense().cpu().detach().numpy()
    else:
        return input.cpu().detach().numpy()

+
 def copy_to(input, ctx, **kwargs):
    ctx = th.device(ctx)
-    if ctx.type == 'cpu':
+    if ctx.type == "cpu":
        return input.cpu()
-    elif ctx.type == 'cuda':
+    elif ctx.type == "cuda":
        if ctx.index is not None:
            th.cuda.set_device(ctx.index)
        return input.cuda(**kwargs)
    else:
-        raise RuntimeError('Invalid context', ctx)
+        raise RuntimeError("Invalid context", ctx)
+

 def is_pinned(input):
    return input.is_pinned()

+
 def sum(input, dim, keepdims=False):
    return th.sum(input, dim=dim, keepdim=keepdims)

+
 def floor_div(in1, in2):
    return in1 // in2

+
 def reduce_sum(input):
    return input.sum()

+
 def cumsum(input, dim):
    return th.cumsum(input, dim=dim)

+
 def mean(input, dim):
    return th.mean(input, dim=dim)

+
 def reduce_mean(input):
    return input.mean()

+
 def max(input, dim):
    # NOTE: the second argmax array is not returned
    return th.max(input, dim=dim)[0]

+
 def reduce_max(input):
    return input.max()

+
 def min(input, dim):
    # NOTE: the second argmin array is not returned
    return th.min(input, dim=dim)[0]

+
 def reduce_min(input):
    return input.min()

+
 def argsort(input, dim, descending):
    return th.argsort(input, dim=dim, descending=descending)

+
 def topk(input, k, dim, descending=True):
    return th.topk(input, k, dim, largest=descending)[0]

+
 def argtopk(input, k, dim, descending=True):
    return th.topk(input, k, dim, largest=descending)[1]

+
 def exp(input):
    return th.exp(input)

+
 def inverse(input):
    return th.inverse(input)

+
 def sqrt(input):
    return th.sqrt(input)

+
 def softmax(input, dim=-1):
    return th.softmax(input, dim=dim)

+
 def cat(seq, dim):
    return th.cat(seq, dim=dim)

+
 def stack(seq, dim):
    return th.stack(seq, dim=dim)

+
 def split(input, sizes_or_sections, dim):
    return th.split(input, sizes_or_sections, dim)

+
 def repeat(input, repeats, dim):
-    return th.repeat_interleave(input, repeats, dim) # PyTorch 1.1
+    return th.repeat_interleave(input, repeats, dim)  # PyTorch 1.1
+

 def gather_row(data, row_index):
    return th.index_select(data, 0, row_index.long())

+
 def slice_axis(data, axis, begin, end):
    return th.narrow(data, axis, begin, end - begin)

+
 def take(data, indices, dim):
-    new_shape = data.shape[:dim] + indices.shape + data.shape[dim+1:]
+    new_shape = data.shape[:dim] + indices.shape + data.shape[dim + 1 :]
    return th.index_select(data, dim, indices.view(-1)).view(new_shape)

+
 def narrow_row(x, start, stop):
    return x[start:stop]

+
 def index_add_inplace(data, row_idx, value):
    data.index_add_(0, row_idx, value)

+
 def scatter_row(data, row_index, value):
    return data.index_copy(0, row_index.long(), value)

+
 def scatter_row_inplace(data, row_index, value):
    data[row_index.long()] = value

+
 def squeeze(input, dim):
    return th.squeeze(input, dim)

+
 def unsqueeze(input, dim):
    return th.unsqueeze(input, dim)

+
 def reshape(input, shape):
-    return th.reshape(input ,shape)
+    return th.reshape(input, shape)
+

 def swapaxes(input, axis1, axis2):
    return th.transpose(input, axis1, axis2)

+
 def zeros(shape, dtype, ctx):
    return th.zeros(shape, dtype=dtype, device=ctx)

+
 def zeros_like(input):
    return th.zeros_like(input)

+
 def ones(shape, dtype, ctx):
    return th.ones(shape, dtype=dtype, device=ctx)

+
 def uniform(shape, dtype, ctx, low, high):
    return th.empty(shape, dtype=dtype, device=ctx).uniform_(low, high)

+
 def randint(shape, dtype, ctx, low, high):
    return th.randint(low, high, shape, dtype=dtype, device=ctx)

+
 def pad_packed_tensor(input, lengths, value, l_min=None):
    old_shape = input.shape
    device = input.device
@@ -252,11 +317,12 @@ def pad_packed_tensor(input, lengths, value, l_min=None):
    x.fill_(value)
    index = th.ones(len(input), dtype=th.int64, device=device)
    cum_lengths = th.cumsum(lengths, 0)
-    index[cum_lengths[:-1]] += (max_len - lengths[:-1])
+    index[cum_lengths[:-1]] += max_len - lengths[:-1]
    index = th.cumsum(index, 0) - 1
    x[index] = input
    return x.view(batch_size, max_len, *old_shape[1:])

+
 def pack_padded_tensor(input, lengths):
    max_len = input.shape[1]
    device = input.device
@@ -268,222 +334,377 @@ def pack_padded_tensor(input, lengths):
    out_len = lengths.sum().item()
    index = th.ones(out_len, dtype=th.int64, device=device)
    cum_lengths = th.cumsum(lengths, 0)
-    index[cum_lengths[:-1]] += (max_len - lengths[:-1])
+    index[cum_lengths[:-1]] += max_len - lengths[:-1]
    index = th.cumsum(index, 0) - 1
    return input[index]

+
 def boolean_mask(input, mask):
-    if 'bool' not in str(mask.dtype):
+    if "bool" not in str(mask.dtype):
        mask = th.tensor(mask, dtype=th.bool)
    return input[mask]

+
 def equal(x, y):
    return x == y

+
 def allclose(x, y, rtol=1e-4, atol=1e-4):
    return th.allclose(x, y, rtol=rtol, atol=atol)

+
 def logical_not(input):
    return ~input

+
 def logical_and(input1, input2):
    return input1 & input2

+
 def clone(input):
    return input.clone()

+
 def clamp(data, min_val, max_val):
    return th.clamp(data, min_val, max_val)

+
 def replace_inf_with_zero(x):
    return th.masked_fill(x, th.isinf(x), 0)

+
 def count_nonzero(input):
    # TODO: fallback to numpy for backward compatibility
    return np.count_nonzero(input)

+
 def unique(input, return_inverse=False, return_counts=False):
    if input.dtype == th.bool:
        input = input.type(th.int8)
-    return th.unique(input, return_inverse=return_inverse, return_counts=return_counts)
+    return th.unique(
+        input, return_inverse=return_inverse, return_counts=return_counts
+    )
+

 def full_1d(length, fill_value, dtype, ctx):
    return th.full((length,), fill_value, dtype=dtype, device=ctx)

+
 def nonzero_1d(input):
    x = th.nonzero(input, as_tuple=False).squeeze()
    return x if x.dim() == 1 else x.view(-1)

+
 def sort_1d(input):
    return th.sort(input)

+
 def arange(start, stop, dtype=th.int64, ctx=None):
    return th.arange(start, stop, dtype=dtype, device=ctx)

+
 def rand_shuffle(arr):
    idx = th.randperm(len(arr))
    return arr[idx]

+
 def zerocopy_to_dlpack(input):
    return dlpack.to_dlpack(input.contiguous())

+
 def zerocopy_from_dlpack(dlpack_tensor):
    return dlpack.from_dlpack(dlpack_tensor)

+
 def zerocopy_to_numpy(input):
    # NOTE: not zerocopy
    return asnumpy(input)

+
 def zerocopy_from_numpy(np_array):
    return th.as_tensor(np_array)

+
 if LooseVersion(th.__version__) >= LooseVersion("1.10.0"):
+
    def zerocopy_to_dgl_ndarray(data):
        if data.dtype == th.bool:
            data = data.byte()
        return nd.from_dlpack(dlpack.to_dlpack(data.contiguous()))
+
 else:
+
    def zerocopy_to_dgl_ndarray(data):
        return nd.from_dlpack(dlpack.to_dlpack(data.contiguous()))

+
 def zerocopy_to_dgl_ndarray_for_write(input):
-    assert input.is_contiguous(), "Cannot convert non-contiguous tensors " \
+    assert input.is_contiguous(), (
+        "Cannot convert non-contiguous tensors "
        "to dgl ndarray for write. Call .to_contiguous() first."
-    assert input.numel() == input.storage().size(), "Cannot convert view " \
-        "tensors to dgl ndarray for write."
+    )
+    assert input.numel() == input.storage().size(), (
+        "Cannot convert view " "tensors to dgl ndarray for write."
+    )
    return zerocopy_to_dgl_ndarray(input)

+
 def zerocopy_from_dgl_ndarray(data):
    if data.shape == (0,):
        # NOTE: PyTorch v1.5 does not accept DLPack object representing empty CUDA tensor.
        #  Related issue: https://github.com/pytorch/pytorch/issues/41182
        #  The issue will be fixed in v1.6 and later.
-        return th.tensor([], dtype=getattr(th, data.dtype),
-                         device=to_backend_ctx(data.ctx))
+        return th.tensor(
+            [], dtype=getattr(th, data.dtype), device=to_backend_ctx(data.ctx)
+        )
    elif len(data.shape) == 0 or builtins.min(data.shape) == 0:
        # Workaround the same issue as above, but preserve the shape of the
        # empty tensor. This is needed by the sparse optimizer when one of
        # processors may receive no gradients to update, but we want to keep
        # the dimension of the embedding.
-        return th.empty(data.shape, dtype=getattr(th, data.dtype),
-                        device=to_backend_ctx(data.ctx))
+        return th.empty(
+            data.shape,
+            dtype=getattr(th, data.dtype),
+            device=to_backend_ctx(data.ctx),
+        )
    else:
        return dlpack.from_dlpack(data.to_dlpack())


 class BinaryReduce(th.autograd.Function):
    @staticmethod
-    def forward(ctx, reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data, out_data,
-                out_size, lhs_map, rhs_map, out_map):
+    def forward(
+        ctx,
+        reducer,
+        binary_op,
+        graph,
+        lhs,
+        rhs,
+        lhs_data,
+        rhs_data,
+        out_data,
+        out_size,
+        lhs_map,
+        rhs_map,
+        out_map,
+    ):
        lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
        rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
-        feat_shape = K.infer_binary_feature_shape(binary_op, lhs_data_nd, rhs_data_nd)
+        feat_shape = K.infer_binary_feature_shape(
+            binary_op, lhs_data_nd, rhs_data_nd
+        )
        out_shape = feat_shape
-        if binary_op == 'dot':
+        if binary_op == "dot":
            out_shape = feat_shape[:-1]
        out_data_nd = zerocopy_to_dgl_ndarray(out_data)
        K.binary_op_reduce(
-            reducer if reducer != 'mean' else 'sum',
-            binary_op, graph, lhs, rhs, lhs_data_nd, rhs_data_nd,
-            out_data_nd, lhs_map[0], rhs_map[0], out_map[0])
+            reducer if reducer != "mean" else "sum",
+            binary_op,
+            graph,
+            lhs,
+            rhs,
+            lhs_data_nd,
+            rhs_data_nd,
+            out_data_nd,
+            lhs_map[0],
+            rhs_map[0],
+            out_map[0],
+        )
        # normalize if mean reducer
        # NOTE(zihao): this is a temporary hack and we should have better solution in the future.
-        if reducer == 'mean':
+        if reducer == "mean":
            degs = lhs_data.new_empty((out_data.shape[0],))
            degs_nd = zerocopy_to_dgl_ndarray(degs)
-            if lhs != TargetCode.DST: # src or edge
+            if lhs != TargetCode.DST:  # src or edge
                target = lhs
                n = lhs_data.shape[0]
                in_map = lhs_map[0]
-            else: # rhs != TargetCode.DST
+            else:  # rhs != TargetCode.DST
                target = rhs
                n = rhs_data.shape[0]
                in_map = rhs_map[0]
            in_ones = lhs_data.new_ones((n,))
            in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
            K.copy_reduce(
-                'sum', graph, target, in_ones_nd, degs_nd, in_map, out_map[0])
+                "sum", graph, target, in_ones_nd, degs_nd, in_map, out_map[0]
+            )
            # reshape
-            degs = degs.reshape((out_data.shape[0],) + (1,) * (out_data.dim() - 1)).clamp(min=1)
+            degs = degs.reshape(
+                (out_data.shape[0],) + (1,) * (out_data.dim() - 1)
+            ).clamp(min=1)
            out_data = out_data / degs
        else:
            degs = None
        # save_for_backward can only save variables
-        ctx.backward_cache = (reducer, binary_op, graph, lhs, rhs, lhs_map,
-                              rhs_map, out_map, feat_shape, degs)
+        ctx.backward_cache = (
+            reducer,
+            binary_op,
+            graph,
+            lhs,
+            rhs,
+            lhs_map,
+            rhs_map,
+            out_map,
+            feat_shape,
+            degs,
+        )
        ctx.save_for_backward(lhs_data, rhs_data, out_data)
        return out_data

    @staticmethod
    def backward(ctx, grad_out):
-        reducer, binary_op, graph, lhs, rhs, lhs_map, rhs_map, out_map, \
-            feat_shape, degs = ctx.backward_cache
+        (
+            reducer,
+            binary_op,
+            graph,
+            lhs,
+            rhs,
+            lhs_map,
+            rhs_map,
+            out_map,
+            feat_shape,
+            degs,
+        ) = ctx.backward_cache
        lhs_data, rhs_data, out_data = ctx.saved_tensors
        lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
        rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
        out_data_nd = zerocopy_to_dgl_ndarray(out_data)
        grad_lhs = None
        grad_rhs = None
-        if reducer == 'mean':
+        if reducer == "mean":
            grad_out = grad_out / degs
        grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
        if ctx.needs_input_grad[5]:
            grad_lhs = grad_out.new_empty((lhs_data_nd.shape[0],) + feat_shape)
            K.backward_lhs_binary_op_reduce(
-                reducer if reducer != 'mean' else 'sum',
-                binary_op, graph, lhs, rhs, lhs_data_nd, rhs_data_nd,
-                out_data_nd, grad_out_nd, zerocopy_to_dgl_ndarray(grad_lhs),
-                lhs_map[1], rhs_map[1], out_map[1])
+                reducer if reducer != "mean" else "sum",
+                binary_op,
+                graph,
+                lhs,
+                rhs,
+                lhs_data_nd,
+                rhs_data_nd,
+                out_data_nd,
+                grad_out_nd,
+                zerocopy_to_dgl_ndarray(grad_lhs),
+                lhs_map[1],
+                rhs_map[1],
+                out_map[1],
+            )
            grad_lhs = _reduce_grad(grad_lhs, lhs_data_nd.shape)
        if ctx.needs_input_grad[6]:
            grad_rhs = grad_out.new_empty((rhs_data_nd.shape[0],) + feat_shape)
            K.backward_rhs_binary_op_reduce(
-                reducer if reducer != 'mean' else 'sum',
-                binary_op, graph, lhs, rhs, lhs_data_nd, rhs_data_nd,
-                out_data_nd, grad_out_nd, zerocopy_to_dgl_ndarray(grad_rhs),
-                lhs_map[1], rhs_map[1], out_map[1])
+                reducer if reducer != "mean" else "sum",
+                binary_op,
+                graph,
+                lhs,
+                rhs,
+                lhs_data_nd,
+                rhs_data_nd,
+                out_data_nd,
+                grad_out_nd,
+                zerocopy_to_dgl_ndarray(grad_rhs),
+                lhs_map[1],
+                rhs_map[1],
+                out_map[1],
+            )
            grad_rhs = _reduce_grad(grad_rhs, rhs_data_nd.shape)

-        return None, None, None, None, None, grad_lhs, grad_rhs, None, None, None, \
-            None, None
-
-
-def binary_reduce(reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data,
-                  out_size, lhs_map=(None, None), rhs_map=(None, None), out_map=(None, None)):
+        return (
+            None,
+            None,
+            None,
+            None,
+            None,
+            grad_lhs,
+            grad_rhs,
+            None,
+            None,
+            None,
+            None,
+            None,
+        )
+
+
+def binary_reduce(
+    reducer,
+    binary_op,
+    graph,
+    lhs,
+    rhs,
+    lhs_data,
+    rhs_data,
+    out_size,
+    lhs_map=(None, None),
+    rhs_map=(None, None),
+    out_map=(None, None),
+):
    lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
    rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
-    feat_shape = K.infer_binary_feature_shape(binary_op, lhs_data_nd, rhs_data_nd)
+    feat_shape = K.infer_binary_feature_shape(
+        binary_op, lhs_data_nd, rhs_data_nd
+    )

    out_shape = feat_shape
-    if binary_op == 'dot':
+    if binary_op == "dot":
        out_shape = feat_shape[:-1]
    out_data = lhs_data.new_empty((out_size,) + out_shape)

    return BinaryReduce.apply(
-            reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data, out_data,
-            out_size, lhs_map, rhs_map, out_map)
+        reducer,
+        binary_op,
+        graph,
+        lhs,
+        rhs,
+        lhs_data,
+        rhs_data,
+        out_data,
+        out_size,
+        lhs_map,
+        rhs_map,
+        out_map,
+    )


 class CopyReduce(th.autograd.Function):
    @staticmethod
-    def forward(ctx, reducer, graph, target, in_data, out_data, out_size, in_map,
-                out_map):
+    def forward(
+        ctx,
+        reducer,
+        graph,
+        target,
+        in_data,
+        out_data,
+        out_size,
+        in_map,
+        out_map,
+    ):
        in_data_nd = zerocopy_to_dgl_ndarray(in_data)
        out_data_nd = zerocopy_to_dgl_ndarray(out_data)
        K.copy_reduce(
-            reducer if reducer != 'mean' else 'sum',
-            graph, target, in_data_nd, out_data_nd, in_map[0], out_map[0])
+            reducer if reducer != "mean" else "sum",
+            graph,
+            target,
+            in_data_nd,
+            out_data_nd,
+            in_map[0],
+            out_map[0],
+        )
        # normalize if mean reducer
        # NOTE(zihao): this is a temporary hack and we should have better solution in the future.
-        if reducer == 'mean':
+        if reducer == "mean":
            in_ones = in_data.new_ones((in_data.shape[0],))
            degs = in_data.new_empty((out_data.shape[0],))
            in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
            degs_nd = zerocopy_to_dgl_ndarray(degs)
            K.copy_reduce(
-                'sum', graph, target, in_ones_nd, degs_nd, in_map[0], out_map[0])
+                "sum", graph, target, in_ones_nd, degs_nd, in_map[0], out_map[0]
+            )
            # reshape
-            degs = degs.reshape((out_data.shape[0],) + (1,) * (out_data.dim() - 1)).clamp(min=1)
+            degs = degs.reshape(
+                (out_data.shape[0],) + (1,) * (out_data.dim() - 1)
+            ).clamp(min=1)
            out_data = out_data / degs
        else:
            degs = None
@@ -499,22 +720,38 @@ class CopyReduce(th.autograd.Function):
        in_data_nd = zerocopy_to_dgl_ndarray(in_data)
        out_data_nd = zerocopy_to_dgl_ndarray(out_data)
        grad_in = None
-        if reducer == 'mean':
+        if reducer == "mean":
            grad_out = grad_out / degs
        grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
        if ctx.needs_input_grad[3]:
            grad_in = grad_out.new_empty(in_data_nd.shape)
            K.backward_copy_reduce(
-                reducer if reducer != 'mean' else 'sum',
-                graph, target, in_data_nd, out_data_nd, grad_out_nd,
-                zerocopy_to_dgl_ndarray(grad_in), in_map[1], out_map[1])
+                reducer if reducer != "mean" else "sum",
+                graph,
+                target,
+                in_data_nd,
+                out_data_nd,
+                grad_out_nd,
+                zerocopy_to_dgl_ndarray(grad_in),
+                in_map[1],
+                out_map[1],
+            )
        return None, None, None, grad_in, None, None, None, None


-def copy_reduce(reducer, graph, target, in_data, out_size, in_map=(None, None),
-                out_map=(None, None)):
+def copy_reduce(
+    reducer,
+    graph,
+    target,
+    in_data,
+    out_size,
+    in_map=(None, None),
+    out_map=(None, None),
+):
    out_data = in_data.new_empty((out_size,) + in_data.shape[1:])
-    return CopyReduce.apply(reducer, graph, target, in_data, out_data, out_size, in_map, out_map)
+    return CopyReduce.apply(
+        reducer, graph, target, in_data, out_data, out_size, in_map, out_map
+    )


 def _reduce_grad(grad, shape):
@@ -543,15 +780,19 @@ def _reduce_grad(grad, shape):
    num_to_squeeze = len(grad_shape) - len(in_shape)
    # pad inshape
    in_shape = (1,) * num_to_squeeze + in_shape
-    reduce_idx = th.nonzero(th.tensor(grad_shape) - th.tensor(in_shape), as_tuple=False)
+    reduce_idx = th.nonzero(
+        th.tensor(grad_shape) - th.tensor(in_shape), as_tuple=False
+    )
    reduce_idx += 1  # skip batch dim
    grad = grad.sum(dim=tuple(reduce_idx), keepdim=True)
    return grad.view(shape)

+
 def sync():
    # Pytorch performs computation synchronously, so no need for synchronization.
    pass

+
 def attach_grad(x):
    if x.grad is not None:
        x.grad.zero_()
@@ -559,21 +800,30 @@ def attach_grad(x):
    else:
        return x.requires_grad_()

+
 def backward(x, head_gradient=None):
-    if head_gradient is not None and head_gradient.shape[0] == 1 and len(head_gradient.shape) == 1:
+    if (
+        head_gradient is not None
+        and head_gradient.shape[0] == 1
+        and len(head_gradient.shape) == 1
+    ):
        # Fix for torch 1.3.1
        head_gradient = th.tensor(head_gradient.item()).to(head_gradient.device)
    x.backward(head_gradient)

+
 def grad(x):
    return x.grad

+
 def is_no_grad(x):
    return x.grad is None or (x.grad == 0).all()

+
 def is_recording():
    return th.is_grad_enabled()

+
 class record_grad(object):
    def __init__(self):
        pass
@@ -584,4 +834,5 @@ class record_grad(object):
    def __exit__(self, exc_type, exc_value, exc_traceback):
        pass

+
 no_grad = th.no_grad
--- a/python/dgl/backend/set_default_backend.py
+++ b/python/dgl/backend/set_default_backend.py
 import argparse
-import os
 import json
+import os
+

 def set_default_backend(default_dir, backend_name):
    os.makedirs(default_dir, exist_ok=True)
-    config_path = os.path.join(default_dir, 'config.json')
-    with open(config_path, "w") as config_file: 
-        json.dump({'backend': backend_name.lower()}, config_file)
-    print('Setting the default backend to "{}". You can change it in the '
-          '~/.dgl/config.json file or export the DGLBACKEND environment variable.  '
-          'Valid options are: pytorch, mxnet, tensorflow (all lowercase)'.format(
-              backend_name))
+    config_path = os.path.join(default_dir, "config.json")
+    with open(config_path, "w") as config_file:
+        json.dump({"backend": backend_name.lower()}, config_file)
+    print(
+        'Setting the default backend to "{}". You can change it in the '
+        "~/.dgl/config.json file or export the DGLBACKEND environment variable.  "
+        "Valid options are: pytorch, mxnet, tensorflow (all lowercase)".format(
+            backend_name
+        )
+    )
+

 if __name__ == "__main__":
    parser = argparse.ArgumentParser()
-    parser.add_argument("default_dir", type=str, default=os.path.join(os.path.expanduser('~'), '.dgl'))
-    parser.add_argument("backend", nargs=1, type=str, choices=[
-                        'pytorch', 'tensorflow', 'mxnet'], help="Set default backend")
+    parser.add_argument(
+        "default_dir",
+        type=str,
+        default=os.path.join(os.path.expanduser("~"), ".dgl"),
+    )
+    parser.add_argument(
+        "backend",
+        nargs=1,
+        type=str,
+        choices=["pytorch", "tensorflow", "mxnet"],
+        help="Set default backend",
+    )
    args = parser.parse_args()
    set_default_backend(args.default_dir, args.backend[0])
--- a/python/dgl/backend/tensorflow/__init__.py
+++ b/python/dgl/backend/tensorflow/__init__.py
 import os
-os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

-from .tensor import *
+os.environ["TF_FORCE_GPU_ALLOW_GROWTH"] = "true"
+
 from .sparse import *
+from .tensor import *
--- a/python/dgl/backend/tensorflow/sparse.py
+++ b/python/dgl/backend/tensorflow/sparse.py
-import tensorflow as tf
 import numpy as np
-from .tensor import tensor, copy_to, context, asnumpy, zerocopy_from_numpy
-from ...base import is_all, ALL
-from ...sparse import _gspmm, _gsddmm, _segment_reduce, _bwd_segment_cmp, _scatter_add
-from ...sparse import _csrmm, _csrsum, _csrmask
-from ...heterograph_index import create_unitgraph_from_csr
+import tensorflow as tf

-__all__ = ['gspmm', 'gsddmm', 'edge_softmax', 'segment_reduce', 'scatter_add',
-           'csrmm', 'csrsum', 'csrmask']
+from ...base import ALL, is_all
+from ...heterograph_index import create_unitgraph_from_csr
+from ...sparse import (
+    _bwd_segment_cmp,
+    _csrmask,
+    _csrmm,
+    _csrsum,
+    _gsddmm,
+    _gspmm,
+    _scatter_add,
+    _segment_reduce,
+)
+from .tensor import asnumpy, context, copy_to, tensor, zerocopy_from_numpy
+
+__all__ = [
+    "gspmm",
+    "gsddmm",
+    "edge_softmax",
+    "segment_reduce",
+    "scatter_add",
+    "csrmm",
+    "csrsum",
+    "csrmask",
+]


 def _scatter_nd(index, src, n_rows):
@@ -21,7 +38,10 @@ def _scatter_nd(index, src, n_rows):
        di = shp[i]
        offset_i = tf.range(di, dtype=index.dtype)
        offsets.append(
-            tf.reshape((stride * offset_i), (1,) * i + (di,) + (1,) * (ndim - 1 - i)))
+            tf.reshape(
+                (stride * offset_i), (1,) * i + (di,) + (1,) * (ndim - 1 - i)
+            )
+        )
        stride *= di
    if ndim > 1:
        new_idx = index * stride + copy_to(sum(offsets), ctx)
@@ -29,7 +49,9 @@ def _scatter_nd(index, src, n_rows):
        new_idx = index
    src = tf.reshape(src, (-1,))
    new_idx = tf.reshape(new_idx, (-1, 1))
-    rst = tf.reshape(tf.scatter_nd(new_idx, src, (stride * n_rows,)), (n_rows, *shp[1:]))
+    rst = tf.reshape(
+        tf.scatter_nd(new_idx, src, (stride * n_rows,)), (n_rows, *shp[1:])
+    )
    return rst


@@ -43,7 +65,10 @@ def _gather_nd(index, src):
        di = shp[i]
        offset_i = tf.range(di, dtype=index.dtype)
        offsets.append(
-            tf.reshape((stride * offset_i), (1,) * i + (di,) + (1,) * (ndim - 1 - i)))
+            tf.reshape(
+                (stride * offset_i), (1,) * i + (di,) + (1,) * (ndim - 1 - i)
+            )
+        )
        stride *= di
    if ndim > 1:
        new_idx = index * stride + copy_to(sum(offsets), ctx)
@@ -78,10 +103,13 @@ def _reduce_grad(grad, shape):
    num_to_squeeze = len(grad_shape) - len(in_shape)
    # pad inshape
    in_shape = (1,) * num_to_squeeze + in_shape
-    reduce_idx = np.asarray(np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape)))
+    reduce_idx = np.asarray(
+        np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape))
+    )
    reduce_idx += 1  # skip batch dim
-    reduce_idx_tensor = tf.constant(tuple(
-        reduce_idx.flatten().tolist()), dtype=tf.int32)
+    reduce_idx_tensor = tf.constant(
+        tuple(reduce_idx.flatten().tolist()), dtype=tf.int32
+    )
    grad = tf.reduce_sum(grad, axis=reduce_idx_tensor, keepdims=True)
    return tf.reshape(grad, shape)

@@ -96,11 +124,11 @@ def _need_reduce_last_dim(ufeat, efeat):


 def _muldiv(op, x):
-    return 1. / x if op == 'div' else x
+    return 1.0 / x if op == "div" else x


 def _addsub(op, x):
-    return -x if op == 'sub' else x
+    return -x if op == "sub" else x


 def _expand(x, shape):
@@ -112,49 +140,55 @@ def gspmm_real(gidx, op, reduce_op, X, Y):

    def grad(dZ):
        dZ = tensor(dZ)
-        if op != 'copy_rhs':
+        if op != "copy_rhs":
            g_rev = gidx.reverse()
-            if reduce_op == 'sum':
-                if op in ['mul', 'div']:
-                    dX = _gspmm(g_rev, 'mul', 'sum', dZ, _muldiv(op, Y))[0]
-                elif op in ['add', 'sub']:
-                    dX = _gspmm(g_rev, 'copy_lhs', 'sum', dZ, Y)[0]
-                elif op == 'copy_lhs':
-                    dX = _gspmm(g_rev, 'copy_lhs', 'sum', dZ, None)[0]
+            if reduce_op == "sum":
+                if op in ["mul", "div"]:
+                    dX = _gspmm(g_rev, "mul", "sum", dZ, _muldiv(op, Y))[0]
+                elif op in ["add", "sub"]:
+                    dX = _gspmm(g_rev, "copy_lhs", "sum", dZ, Y)[0]
+                elif op == "copy_lhs":
+                    dX = _gspmm(g_rev, "copy_lhs", "sum", dZ, None)[0]
            else:
-                if op in ['mul', 'div']:
+                if op in ["mul", "div"]:
                    dX = _scatter_nd(
                        argX,
-                        _muldiv(op, _gather_nd(argY, _expand(Y, dZ.shape[1:]))) * dZ,
-                        X.shape[0])
-                elif op in ['add', 'sub', 'copy_lhs']:
+                        _muldiv(op, _gather_nd(argY, _expand(Y, dZ.shape[1:])))
+                        * dZ,
+                        X.shape[0],
+                    )
+                elif op in ["add", "sub", "copy_lhs"]:
                    dX = _scatter_nd(argX, dZ, X.shape[0])
            dX = _reduce_grad(dX, X.shape)
        else:
            dX = tf.zeros_like(X)
-        if op != 'copy_lhs':
-            if reduce_op == 'sum':
-                if op == 'mul' and _need_reduce_last_dim(X, Y):
-                    dY = _gsddmm(gidx, 'dot', X, dZ)
-                elif op in ['mul', 'div']:
-                    dY = _gsddmm(gidx, 'mul', X, dZ)
-                    if op == 'div': dY = -dY / (Y ** 2)
-                elif op in ['add', 'sub', 'copy_rhs']:
-                    dY = _gsddmm(gidx, 'copy_rhs', X, _addsub(op, dZ))
+        if op != "copy_lhs":
+            if reduce_op == "sum":
+                if op == "mul" and _need_reduce_last_dim(X, Y):
+                    dY = _gsddmm(gidx, "dot", X, dZ)
+                elif op in ["mul", "div"]:
+                    dY = _gsddmm(gidx, "mul", X, dZ)
+                    if op == "div":
+                        dY = -dY / (Y**2)
+                elif op in ["add", "sub", "copy_rhs"]:
+                    dY = _gsddmm(gidx, "copy_rhs", X, _addsub(op, dZ))
            else:
                out_shp = (Y.shape[0],) + dZ.shape[1:]
-                if op in ['mul',  'div']:
+                if op in ["mul", "div"]:
                    dY = _scatter_nd(
                        argY,
                        _gather_nd(argX, _expand(X, dZ.shape[1:])) * dZ,
-                        Y.shape[0])
-                    if op == 'div': dY = -dY / (Y ** 2)
-                elif op in ['add', 'sub', 'copy_rhs']:
+                        Y.shape[0],
+                    )
+                    if op == "div":
+                        dY = -dY / (Y**2)
+                elif op in ["add", "sub", "copy_rhs"]:
                    dY = _scatter_nd(argY, _addsub(op, dZ), Y.shape[0])
            dY = _reduce_grad(dY, Y.shape)
        else:
            dY = tf.zeros_like(Y)
        return dX, dY
+
    return out, grad


@@ -162,6 +196,7 @@ def gspmm(gidx, op, reduce_op, X, Y):
    @tf.custom_gradient
    def _lambda(X, Y):
        return gspmm_real(gidx, op, reduce_op, X, Y)
+
    if X is None:
        X = tf.zeros(())
    if Y is None:
@@ -173,58 +208,68 @@ def gsddmm_real(gidx, op, X, Y, lhs_target, rhs_target):
    out = _gsddmm(gidx, op, X, Y, lhs_target, rhs_target)

    def grad(dZ):
-        if op != 'copy_rhs':
-            if lhs_target in ['u', 'v']:
-                _gidx = gidx if lhs_target == 'v' else gidx.reverse()
-                if op in ['add', 'sub', 'copy_lhs']:
-                    dX = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ)[0]
+        if op != "copy_rhs":
+            if lhs_target in ["u", "v"]:
+                _gidx = gidx if lhs_target == "v" else gidx.reverse()
+                if op in ["add", "sub", "copy_lhs"]:
+                    dX = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[0]
                else:  # mul, div, dot
                    if rhs_target == lhs_target:
-                        dX = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ)[0] * _muldiv(op, Y)
-                    elif rhs_target == 'e':
-                        dX = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ * _muldiv(op, Y))[0]
+                        dX = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[
+                            0
+                        ] * _muldiv(op, Y)
+                    elif rhs_target == "e":
+                        dX = _gspmm(
+                            _gidx, "copy_rhs", "sum", None, dZ * _muldiv(op, Y)
+                        )[0]
                    else:  # rhs_target = !lhs_target
-                        dX = _gspmm(_gidx, 'mul', 'sum', _muldiv(op, Y), dZ)[0]
+                        dX = _gspmm(_gidx, "mul", "sum", _muldiv(op, Y), dZ)[0]
            else:  # lhs_target == 'e'
-                if op in ['add', 'sub', 'copy_lhs']:
+                if op in ["add", "sub", "copy_lhs"]:
                    dX = dZ
                else:  # mul, div, dot
-                    dX = _gsddmm(gidx, 'mul', dZ, _muldiv(op, Y), 'e', rhs_target)
+                    dX = _gsddmm(
+                        gidx, "mul", dZ, _muldiv(op, Y), "e", rhs_target
+                    )
            dX = _reduce_grad(dX, X.shape)
        else:
            dX = tf.zeros_like(X)
-        if op != 'copy_lhs':
-            if rhs_target in ['u', 'v']:
-                _gidx = gidx if rhs_target == 'v' else gidx.reverse()
-                if op in ['add', 'sub', 'copy_rhs']:
-                    dY = _gspmm(_gidx, 'copy_rhs', 'sum', None, _addsub(op, dZ))[0]
+        if op != "copy_lhs":
+            if rhs_target in ["u", "v"]:
+                _gidx = gidx if rhs_target == "v" else gidx.reverse()
+                if op in ["add", "sub", "copy_rhs"]:
+                    dY = _gspmm(
+                        _gidx, "copy_rhs", "sum", None, _addsub(op, dZ)
+                    )[0]
                else:  # mul, div, dot
                    if lhs_target == rhs_target:
-                        dY = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ)[0] * X
-                    elif lhs_target == 'e':
-                        dY = _gspmm(_gidx, 'copy_rhs', 'sum', None, dZ * X)[0]
+                        dY = _gspmm(_gidx, "copy_rhs", "sum", None, dZ)[0] * X
+                    elif lhs_target == "e":
+                        dY = _gspmm(_gidx, "copy_rhs", "sum", None, dZ * X)[0]
                    else:  # rhs_target = !lhs_target
-                        dY = _gspmm(_gidx, 'mul', 'sum', X, dZ)[0]
-                    if op == 'div':
-                        dY = -dY / (Y ** 2)
+                        dY = _gspmm(_gidx, "mul", "sum", X, dZ)[0]
+                    if op == "div":
+                        dY = -dY / (Y**2)
            else:
-                if op in ['add', 'sub', 'copy_rhs']:
+                if op in ["add", "sub", "copy_rhs"]:
                    dY = _addsub(op, dZ)
                else:  # mul, div, dot
-                    dY = _gsddmm(gidx, 'mul', dZ, X, 'e', lhs_target)
-                    if op == 'div':
-                        dY = -dY / (Y ** 2)
+                    dY = _gsddmm(gidx, "mul", dZ, X, "e", lhs_target)
+                    if op == "div":
+                        dY = -dY / (Y**2)
            dY = _reduce_grad(dY, Y.shape)
        else:
            dY = tf.zeros_like(Y)
        return dX, dY
+
    return out, grad


-def gsddmm(gidx, op, X, Y, lhs_target='u', rhs_target='v'):
+def gsddmm(gidx, op, X, Y, lhs_target="u", rhs_target="v"):
    @tf.custom_gradient
    def _lambda(X, Y):
        return gsddmm_real(gidx, op, X, Y, lhs_target, rhs_target)
+
    if X is None:
        X = tf.zeros(())
    if Y is None:
@@ -232,29 +277,30 @@ def gsddmm(gidx, op, X, Y, lhs_target='u', rhs_target='v'):
    return _lambda(X, Y)


-def edge_softmax_real(gidx, score, eids=ALL, norm_by='dst'):
+def edge_softmax_real(gidx, score, eids=ALL, norm_by="dst"):
    if not is_all(eids):
        gidx = gidx.edge_subgraph([eids], True).graph
-    if norm_by == 'src':
+    if norm_by == "src":
        gidx = gidx.reverse()
-    score_max = _gspmm(gidx, 'copy_rhs', 'max', None, score)[0]
-    score = tf.math.exp(_gsddmm(gidx, 'sub', score, score_max, 'e', 'v'))
-    score_sum = _gspmm(gidx, 'copy_rhs', 'sum', None, score)[0]
-    out = _gsddmm(gidx, 'div', score, score_sum, 'e', 'v')
+    score_max = _gspmm(gidx, "copy_rhs", "max", None, score)[0]
+    score = tf.math.exp(_gsddmm(gidx, "sub", score, score_max, "e", "v"))
+    score_sum = _gspmm(gidx, "copy_rhs", "sum", None, score)[0]
+    out = _gsddmm(gidx, "div", score, score_sum, "e", "v")

    def edge_softmax_backward(grad_out):
        sds = out * grad_out
-        accum = gspmm(gidx, 'copy_rhs', 'sum', None, sds)
-        grad_score = sds - gsddmm(gidx, 'mul', out, accum, 'e', 'v')
+        accum = gspmm(gidx, "copy_rhs", "sum", None, sds)
+        grad_score = sds - gsddmm(gidx, "mul", out, accum, "e", "v")
        return grad_score

    return out, edge_softmax_backward


-def edge_softmax(gidx, logits, eids=ALL, norm_by='dst'):
+def edge_softmax(gidx, logits, eids=ALL, norm_by="dst"):
    @tf.custom_gradient
    def _lambda(logits):
        return edge_softmax_real(gidx, logits, eids, norm_by)
+
    return _lambda(logits)


@@ -263,7 +309,7 @@ def segment_reduce_real(op, x, offsets):

    def segment_reduce_backward(dy):
        m = x.shape[0]
-        if op == 'sum':
+        if op == "sum":
            offsets_np = asnumpy(offsets[1:])
            indices_np = np.zeros((m + 1,), dtype=offsets_np.dtype)
            np.add.at(indices_np, offsets_np, np.ones_like(offsets_np))
@@ -281,6 +327,7 @@ def segment_reduce(op, x, offsets):
    @tf.custom_gradient
    def _lambda(x):
        return segment_reduce_real(op, x, offsets)
+
    return _lambda(x)


@@ -289,7 +336,7 @@ def scatter_add_real(x, idx, m):

    def scatter_add_backward(dy):
        return tf.gather(dy, idx)
-    
+
    return y, scatter_add_backward


@@ -297,53 +344,102 @@ def scatter_add(x, idx, m):
    @tf.custom_gradient
    def _lambda(x):
        return scatter_add_real(x, idx, m)
+
    return _lambda(x)


 def csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes):
    gidxC, C_weights = _csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes)
-    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
+    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
+        0, False, "csr"
+    )

    def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
        # Only the last argument is meaningful.
        dgidxA, dA_weights = _csrmm(
-            gidxC, dC_weights, gidxB.reverse(), B_weights, gidxA.number_of_ntypes())
+            gidxC,
+            dC_weights,
+            gidxB.reverse(),
+            B_weights,
+            gidxA.number_of_ntypes(),
+        )
        dgidxB, dB_weights = _csrmm(
-            gidxA.reverse(), A_weights, gidxC, dC_weights, gidxB.number_of_ntypes())
+            gidxA.reverse(),
+            A_weights,
+            gidxC,
+            dC_weights,
+            gidxB.number_of_ntypes(),
+        )
        dA_weights = _csrmask(dgidxA, dA_weights, gidxA)
        dB_weights = _csrmask(dgidxB, dB_weights, gidxB)
        return dA_weights, dB_weights
-    return (tf.constant(nrows), tf.constant(ncols), C_indptr, C_indices, C_eids, C_weights), grad
+
+    return (
+        tf.constant(nrows),
+        tf.constant(ncols),
+        C_indptr,
+        C_indices,
+        C_eids,
+        C_weights,
+    ), grad
+

 def csrmm(gidxA, A_weights, gidxB, B_weights, num_vtypes):
    @tf.custom_gradient
    def _lambda(A_weights, B_weights):
        return csrmm_real(gidxA, A_weights, gidxB, B_weights, num_vtypes)
-    nrows, ncols, C_indptr, C_indices, C_eids, C_weights = _lambda(A_weights, B_weights)
+
+    nrows, ncols, C_indptr, C_indices, C_eids, C_weights = _lambda(
+        A_weights, B_weights
+    )
    gidxC = create_unitgraph_from_csr(
-        num_vtypes, nrows.numpy(), ncols.numpy(), C_indptr, C_indices, C_eids,
-        ["coo", "csr", "csc"])
+        num_vtypes,
+        nrows.numpy(),
+        ncols.numpy(),
+        C_indptr,
+        C_indices,
+        C_eids,
+        ["coo", "csr", "csc"],
+    )
    return gidxC, C_weights


 def csrsum_real(gidxs, weights):
    gidxC, C_weights = _csrsum(gidxs, weights)
-    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(0, False, 'csr')
+    nrows, ncols, C_indptr, C_indices, C_eids = gidxC.adjacency_matrix_tensors(
+        0, False, "csr"
+    )

    def grad(dnrows, dncols, dC_indptr, dC_indices, dC_eids, dC_weights):
        # Only the last argument is meaningful.
        return tuple(_csrmask(gidxC, dC_weights, gidx) for gidx in gidxs)
-    return (tf.constant(nrows), tf.constant(ncols), C_indptr, C_indices, C_eids, C_weights), grad
+
+    return (
+        tf.constant(nrows),
+        tf.constant(ncols),
+        C_indptr,
+        C_indices,
+        C_eids,
+        C_weights,
+    ), grad
+

 def csrsum(gidxs, weights):
    @tf.custom_gradient
    def _lambda(*weights):
        return csrsum_real(gidxs, weights)
+
    nrows, ncols, C_indptr, C_indices, C_eids, C_weights = _lambda(*weights)
    num_vtypes = gidxs[0].number_of_ntypes()
    gidxC = create_unitgraph_from_csr(
-        num_vtypes, nrows.numpy(), ncols.numpy(), C_indptr, C_indices, C_eids,
-        ["coo", "csr", "csc"])
+        num_vtypes,
+        nrows.numpy(),
+        ncols.numpy(),
+        C_indptr,
+        C_indices,
+        C_eids,
+        ["coo", "csr", "csc"],
+    )
    return gidxC, C_weights


@@ -352,10 +448,13 @@ def csrmask_real(gidxA, A_weights, gidxB):

    def grad(dB_weights):
        return _csrmask(gidxB, dB_weights, gidxA)
+
    return B_weights, grad

+
 def csrmask(gidxA, A_weights, gidxB):
    @tf.custom_gradient
    def _lambda(A_weights):
        return csrmask_real(gidxA, A_weights, gidxB)
+
    return _lambda(A_weights)
--- a/python/dgl/backend/tensorflow/sparse_optim.py
+++ b/python/dgl/backend/tensorflow/sparse_optim.py
-"""Sparse optimizer is not supported for tensorflow"""
\ No newline at end of file
+"""Sparse optimizer is not supported for tensorflow"""
--- a/python/dgl/backend/tensorflow/tensor.py
+++ b/python/dgl/backend/tensorflow/tensor.py
 """Tensorflow backend implementation"""
 from __future__ import absolute_import

-from distutils.version import LooseVersion
-
-import tensorflow as tf
 import builtins
 import numbers
+from distutils.version import LooseVersion
+
 import numpy as np
+import tensorflow as tf

 from ... import ndarray as nd
 from ..._deprecate import kernel as K
 from ...function.base import TargetCode

 if LooseVersion(tf.__version__) < LooseVersion("2.3.0"):
-    raise RuntimeError("DGL requires TensorFlow>=2.3.0 for the official DLPack support.")
+    raise RuntimeError(
+        "DGL requires TensorFlow>=2.3.0 for the official DLPack support."
+    )
+

 def zerocopy_to_dlpack(data):
    return tf.experimental.dlpack.to_dlpack(data)

+
 def zerocopy_from_dlpack(dlpack_tensor):
    # TODO(Jinjing): Tensorflow requires memory to be 64-bytes aligned. We check the
    #   alignment and make a copy if needed. The functionality is better in TF's main repo.
@@ -26,15 +30,17 @@ def zerocopy_from_dlpack(dlpack_tensor):


 def data_type_dict():
-    return {'float16': tf.float16,
-            'float32': tf.float32,
-            'float64': tf.float64,
-            'uint8': tf.uint8,
-            'int8': tf.int8,
-            'int16': tf.int16,
-            'int32': tf.int32,
-            'int64': tf.int64,
-            'bool' : tf.bool}
+    return {
+        "float16": tf.float16,
+        "float32": tf.float32,
+        "float64": tf.float64,
+        "uint8": tf.uint8,
+        "int8": tf.int8,
+        "int16": tf.int16,
+        "int32": tf.int32,
+        "int64": tf.int64,
+        "bool": tf.bool,
+    }


 def cpu():
@@ -73,18 +79,22 @@ def get_preferred_sparse_format():

 def sparse_matrix(data, index, shape, force_format=False):
    fmt = index[0]
-    if fmt != 'coo':
+    if fmt != "coo":
        raise TypeError(
-            'Tensorflow backend only supports COO format. But got %s.' % fmt)
+            "Tensorflow backend only supports COO format. But got %s." % fmt
+        )
    # tf.SparseTensor only supports int64 indexing,
    # therefore manually casting to int64 when input in int32
-    spmat = tf.SparseTensor(indices=tf.cast(tf.transpose(
-        index[1], (1, 0)), tf.int64), values=data, dense_shape=shape)
+    spmat = tf.SparseTensor(
+        indices=tf.cast(tf.transpose(index[1], (1, 0)), tf.int64),
+        values=data,
+        dense_shape=shape,
+    )
    return spmat, None


 def sparse_matrix_indices(spmat):
-    return ('coo', spmat.indices)
+    return ("coo", spmat.indices)


 def is_tensor(obj):
@@ -107,6 +117,7 @@ def context(input):
    spec = tf.DeviceSpec.from_string(input.device)
    return "/{}:{}".format(spec.device_type.lower(), spec.device_index)

+
 def device_type(ctx):
    return tf.DeviceSpec.from_string(ctx).device_type.lower()

@@ -122,7 +133,7 @@ def to_backend_ctx(dglctx):
    elif dev_type == 2:
        return "/gpu:%d" % (dglctx.device_id)
    else:
-        raise ValueError('Unsupported DGL device context:', dglctx)
+        raise ValueError("Unsupported DGL device context:", dglctx)


 def astype(input, ty):
@@ -143,17 +154,21 @@ def copy_to(input, ctx, **kwargs):
        new_tensor = tf.identity(input)
    return new_tensor

+
 def is_pinned(input):
-    return False        # not sure how to do this
+    return False  # not sure how to do this
+

 def sum(input, dim, keepdims=False):
    if input.dtype == tf.bool:
        input = tf.cast(input, tf.int32)
    return tf.reduce_sum(input, axis=dim, keepdims=keepdims)

+
 def floor_div(in1, in2):
    return astype(in1 / in2, dtype(in1))

+
 def reduce_sum(input):
    if input.dtype == tf.bool:
        input = tf.cast(input, tf.int32)
@@ -192,9 +207,13 @@ def reduce_min(input):

 def argsort(input, dim, descending):
    if descending:
-        return tf.cast(tf.argsort(input, axis=dim, direction="DESCENDING"), dtype=tf.int64)
+        return tf.cast(
+            tf.argsort(input, axis=dim, direction="DESCENDING"), dtype=tf.int64
+        )
    else:
-        return tf.cast(tf.argsort(input, axis=dim, direction="ASCENDING"), dtype=tf.int64)
+        return tf.cast(
+            tf.argsort(input, axis=dim, direction="ASCENDING"), dtype=tf.int64
+        )


 def topk(input, k, dim, descending=True):
@@ -248,7 +267,10 @@ def stack(seq, dim):


 def split(input, sizes_or_sections, dim):
-    return [copy_to(_, input.device) for _ in tf.split(input, sizes_or_sections, axis=dim)]
+    return [
+        copy_to(_, input.device)
+        for _ in tf.split(input, sizes_or_sections, axis=dim)
+    ]


 def repeat(input, repeats, dim):
@@ -283,7 +305,9 @@ def scatter_row(data, row_index, value):
    #   notorious legacy issue that int32 type data is always on CPU, which will
    #   crash the program since DGL requires feature data to be on the same device
    #   as graph structure.
-    return copy_to(tf.tensor_scatter_nd_update(data, row_index, value), data.device)
+    return copy_to(
+        tf.tensor_scatter_nd_update(data, row_index, value), data.device
+    )


 def index_add_inplace(data, row_idx, value):
@@ -357,10 +381,11 @@ def pad_packed_tensor(input, lengths, value, l_min=None):
    cum_row = 0
    pad_nparray = np.zeros((ndim, 2), dtype=np.int32)
    for l in lengths:
-        t = input[cum_row:cum_row+l]
+        t = input[cum_row : cum_row + l]
        pad_nparray[0, 1] = max_len - l
-        t = tf.pad(t, tf.constant(pad_nparray),
-                   mode='CONSTANT', constant_values=value)
+        t = tf.pad(
+            t, tf.constant(pad_nparray), mode="CONSTANT", constant_values=value
+        )
        tensor_list.append(t)
        cum_row += l
    return tf.stack(tensor_list, axis=0)
@@ -384,26 +409,35 @@ def equal(x, y):


 def allclose(x, y, rtol=1e-4, atol=1e-4):
-    return np.allclose(tf.convert_to_tensor(x).numpy(),
-                       tf.convert_to_tensor(y).numpy(), rtol=rtol, atol=atol)
+    return np.allclose(
+        tf.convert_to_tensor(x).numpy(),
+        tf.convert_to_tensor(y).numpy(),
+        rtol=rtol,
+        atol=atol,
+    )


 def logical_not(input):
    return ~input

+
 def logical_and(input1, input2):
    return tf.math.logical_and(input1, input2)

+
 def clone(input):
    # TF tensor is always immutable so returning the input is safe.
    return input

+
 def clamp(data, min_val, max_val):
    return tf.clip_by_value(data, min_val, max_val)

+
 def replace_inf_with_zero(x):
    return tf.where(tf.abs(x) == np.inf, 0, x)

+
 def count_nonzero(input):
    return int(tf.math.count_nonzero(input))

@@ -429,7 +463,7 @@ def full_1d(length, fill_value, dtype, ctx):

 def nonzero_1d(input):
    nonzero_bool = tf.cast(input, tf.bool)
-    return tf.reshape(tf.where(nonzero_bool), (-1, ))
+    return tf.reshape(tf.where(nonzero_bool), (-1,))


 def sort_1d(input):
@@ -461,7 +495,7 @@ def zerocopy_from_numpy(np_array):


 def zerocopy_to_dgl_ndarray(data):
-    if device_type(data.device) == 'gpu' and data.dtype in (tf.int32, tf.int64):
+    if device_type(data.device) == "gpu" and data.dtype in (tf.int32, tf.int64):
        # NOTE: TF doesn't keep signed tensors on GPU due to legacy issues with
        #   shape inference. Convert it to unsigned and cast it back afterwards.
        if data.dtype == tf.int32:
@@ -481,35 +515,78 @@ def zerocopy_from_dgl_ndarray(input):
    return zerocopy_from_dlpack(input.to_dlpack())


-def binary_reduce(reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data,
-                  out_size, lhs_map=(None, None), rhs_map=(None, None), out_map=(None, None)):
-
+def binary_reduce(
+    reducer,
+    binary_op,
+    graph,
+    lhs,
+    rhs,
+    lhs_data,
+    rhs_data,
+    out_size,
+    lhs_map=(None, None),
+    rhs_map=(None, None),
+    out_map=(None, None),
+):
    @tf.custom_gradient
    def _lambda(lhs_data, rhs_data):
-        return binary_reduce_real(reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data,
-                                  out_size, lhs_map, rhs_map, out_map)
+        return binary_reduce_real(
+            reducer,
+            binary_op,
+            graph,
+            lhs,
+            rhs,
+            lhs_data,
+            rhs_data,
+            out_size,
+            lhs_map,
+            rhs_map,
+            out_map,
+        )
+
    return _lambda(lhs_data, rhs_data)


-def binary_reduce_real(reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data,
-                       out_size, lhs_map, rhs_map, out_map):
+def binary_reduce_real(
+    reducer,
+    binary_op,
+    graph,
+    lhs,
+    rhs,
+    lhs_data,
+    rhs_data,
+    out_size,
+    lhs_map,
+    rhs_map,
+    out_map,
+):
    with tf.device(lhs_data.device):
        lhs_data_nd = zerocopy_to_dgl_ndarray(lhs_data)
        rhs_data_nd = zerocopy_to_dgl_ndarray(rhs_data)
        feat_shape = K.infer_binary_feature_shape(
-            binary_op, lhs_data_nd, rhs_data_nd)
+            binary_op, lhs_data_nd, rhs_data_nd
+        )
        out_shape = feat_shape
-        if binary_op == 'dot':
+        if binary_op == "dot":
            out_shape = feat_shape[:-1]
        out_data = tf.zeros((out_size,) + out_shape, dtype=lhs_data.dtype)
        out_data_nd = zerocopy_to_dgl_ndarray(out_data)
        K.binary_op_reduce(
-            reducer if reducer != 'mean' else 'sum',
-            binary_op, graph, lhs, rhs, lhs_data_nd, rhs_data_nd,
-            out_data_nd, lhs_map[0], rhs_map[0], out_map[0])
+            reducer if reducer != "mean" else "sum",
+            binary_op,
+            graph,
+            lhs,
+            rhs,
+            lhs_data_nd,
+            rhs_data_nd,
+            out_data_nd,
+            lhs_map[0],
+            rhs_map[0],
+            out_map[0],
+        )
        # normalize if mean reducer
        # NOTE(zihao): this is a temporary hack and we should have better solution in the future.
-        if reducer == 'mean':
+        if reducer == "mean":
            degs = tf.zeros((out_data.shape[0],), dtype=lhs_data.dtype)
            degs_nd = zerocopy_to_dgl_ndarray(degs)
            if lhs != TargetCode.DST:  # src or edge
@@ -523,12 +600,15 @@ def binary_reduce_real(reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data,
            in_ones = tf.ones((n,), dtype=lhs_data.dtype)
            in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
            K.copy_reduce(
-                'sum', graph, target, in_ones_nd, degs_nd, in_map, out_map[0])
+                "sum", graph, target, in_ones_nd, degs_nd, in_map, out_map[0]
+            )
            # reshape
-            degs = tf.reshape(degs,
-                              (out_data.shape[0],) + (1,) * (out_data.ndim - 1))
-            degs = tf.clip_by_value(degs, clip_value_min=1,
-                                    clip_value_max=np.inf)  # ???
+            degs = tf.reshape(
+                degs, (out_data.shape[0],) + (1,) * (out_data.ndim - 1)
+            )
+            degs = tf.clip_by_value(
+                degs, clip_value_min=1, clip_value_max=np.inf
+            )  # ???
            out_data = out_data / degs
        else:
            degs = None
@@ -537,80 +617,129 @@ def binary_reduce_real(reducer, binary_op, graph, lhs, rhs, lhs_data, rhs_data,
        with tf.device(grad_out.device):
            grad_lhs = None
            grad_rhs = None
-            if reducer == 'mean':
+            if reducer == "mean":
                grad_out = grad_out / degs
            grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)

            # comptue gradient for lhs
            grad_lhs = tf.zeros((lhs_data_nd.shape[0],) + feat_shape)
            K.backward_lhs_binary_op_reduce(
-                reducer if reducer != 'mean' else 'sum',
-                binary_op, graph, lhs, rhs, lhs_data_nd, rhs_data_nd,
-                out_data_nd, grad_out_nd, zerocopy_to_dgl_ndarray(grad_lhs),
-                lhs_map[1], rhs_map[1], out_map[1])
+                reducer if reducer != "mean" else "sum",
+                binary_op,
+                graph,
+                lhs,
+                rhs,
+                lhs_data_nd,
+                rhs_data_nd,
+                out_data_nd,
+                grad_out_nd,
+                zerocopy_to_dgl_ndarray(grad_lhs),
+                lhs_map[1],
+                rhs_map[1],
+                out_map[1],
+            )
            grad_lhs = _reduce_grad(grad_lhs, lhs_data_nd.shape)

            # compute gradient for rhs
            grad_rhs = tf.zeros((rhs_data_nd.shape[0],) + feat_shape)
            K.backward_rhs_binary_op_reduce(
-                reducer if reducer != 'mean' else 'sum',
-                binary_op, graph, lhs, rhs, lhs_data_nd, rhs_data_nd,
-                out_data_nd, grad_out_nd, zerocopy_to_dgl_ndarray(grad_rhs),
-                lhs_map[1], rhs_map[1], out_map[1])
+                reducer if reducer != "mean" else "sum",
+                binary_op,
+                graph,
+                lhs,
+                rhs,
+                lhs_data_nd,
+                rhs_data_nd,
+                out_data_nd,
+                grad_out_nd,
+                zerocopy_to_dgl_ndarray(grad_rhs),
+                lhs_map[1],
+                rhs_map[1],
+                out_map[1],
+            )
            grad_rhs = _reduce_grad(grad_rhs, rhs_data_nd.shape)

            return grad_lhs, grad_rhs
+
    return out_data, grad


-def copy_reduce(reducer, graph, target, in_data, out_size, in_map=(None, None),
-                out_map=(None, None)):
+def copy_reduce(
+    reducer,
+    graph,
+    target,
+    in_data,
+    out_size,
+    in_map=(None, None),
+    out_map=(None, None),
+):
    @tf.custom_gradient
    def _lambda(in_data):
-        return copy_reduce_real(reducer, graph, target, in_data, out_size, in_map,
-                                out_map)
+        return copy_reduce_real(
+            reducer, graph, target, in_data, out_size, in_map, out_map
+        )
+
    return _lambda(in_data)


-def copy_reduce_real(reducer, graph, target, in_data, out_size, in_map,
-                     out_map):
+def copy_reduce_real(
+    reducer, graph, target, in_data, out_size, in_map, out_map
+):
    with tf.device(in_data.device):
        out_data = tf.zeros(
-            (out_size,) + tuple(in_data.shape[1:]), dtype=in_data.dtype)
+            (out_size,) + tuple(in_data.shape[1:]), dtype=in_data.dtype
+        )
        in_data_nd = zerocopy_to_dgl_ndarray(in_data)
        out_data_nd = zerocopy_to_dgl_ndarray(out_data)
        K.copy_reduce(
-            reducer if reducer != 'mean' else 'sum',
-            graph, target, in_data_nd, out_data_nd, in_map[0], out_map[0])
+            reducer if reducer != "mean" else "sum",
+            graph,
+            target,
+            in_data_nd,
+            out_data_nd,
+            in_map[0],
+            out_map[0],
+        )
        # normalize if mean reducer
        # NOTE(zihao): this is a temporary hack and we should have better solution in the future.
-        if reducer == 'mean':
+        if reducer == "mean":
            in_ones = tf.ones(in_data.shape[0], dtype=in_data.dtype)
            degs = tf.zeros(out_data.shape[0], dtype=in_data.dtype)
            in_ones_nd = zerocopy_to_dgl_ndarray(in_ones)
            degs_nd = zerocopy_to_dgl_ndarray(degs)
            K.copy_reduce(
-                'sum', graph, target, in_ones_nd, degs_nd, in_map[0], out_map[0])
+                "sum", graph, target, in_ones_nd, degs_nd, in_map[0], out_map[0]
+            )
            # reshape
-            degs = tf.reshape(degs,
-                              (out_data.shape[0],) + (1,) * (out_data.ndim - 1))
-            degs = tf.clip_by_value(degs, clip_value_min=1,
-                                    clip_value_max=np.inf)  # TODO: ???
+            degs = tf.reshape(
+                degs, (out_data.shape[0],) + (1,) * (out_data.ndim - 1)
+            )
+            degs = tf.clip_by_value(
+                degs, clip_value_min=1, clip_value_max=np.inf
+            )  # TODO: ???
            out_data = out_data / degs
        else:
            degs = None

    def grad(grad_out):
        with tf.device(grad_out.device):
-            if reducer == 'mean':
+            if reducer == "mean":
                grad_out = grad_out / degs
            grad_out_nd = zerocopy_to_dgl_ndarray(grad_out)
            grad_in = tf.zeros(in_data_nd.shape)
            K.backward_copy_reduce(
-                reducer if reducer != 'mean' else 'sum',
-                graph, target, in_data_nd, out_data_nd, grad_out_nd,
-                zerocopy_to_dgl_ndarray(grad_in), in_map[1], out_map[1])
+                reducer if reducer != "mean" else "sum",
+                graph,
+                target,
+                in_data_nd,
+                out_data_nd,
+                grad_out_nd,
+                zerocopy_to_dgl_ndarray(grad_in),
+                in_map[1],
+                out_map[1],
+            )
            return grad_in
+
    return out_data, grad


@@ -640,10 +769,11 @@ def _reduce_grad(grad, shape):
    num_to_squeeze = len(grad_shape) - len(in_shape)
    # pad inshape
    in_shape = (1,) * num_to_squeeze + in_shape
-    reduce_idx = np.asarray(np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape)))
+    reduce_idx = np.asarray(
+        np.nonzero(np.asarray(grad_shape) - np.asarray(in_shape))
+    )
    reduce_idx += 1  # skip batch dim
-    reduce_idx_tensor = tf.constant(tuple(
-        reduce_idx.flatten().tolist()))
+    reduce_idx_tensor = tf.constant(tuple(reduce_idx.flatten().tolist()))
    grad = tf.reduce_sum(grad, axis=reduce_idx_tensor, keepdims=True)
    return tf.reshape(grad, shape)

@@ -741,6 +871,7 @@ def is_no_grad(x):
 def is_recording():
    raise NotImplementedError("Tensorflow doesn't support is_recording")

+
 no_grad = None

 initialize_context()
--- a/python/dgl/base.py
+++ b/python/dgl/base.py
@@ -11,39 +11,48 @@ ALL = "__ALL__"
 # An alias for [:]
 SLICE_FULL = slice(None, None, None)
 # Reserved column names for storing parent node/edge types and IDs in flattened heterographs
-NTYPE = '_TYPE'
-NID = '_ID'
-ETYPE = '_TYPE'
-EID = '_ID'
+NTYPE = "_TYPE"
+NID = "_ID"
+ETYPE = "_TYPE"
+EID = "_ID"

 _INTERNAL_COLUMNS = {NTYPE, NID, ETYPE, EID}

+
 def is_internal_column(name):
    """Return true if the column name is reversed by DGL."""
    return name in _INTERNAL_COLUMNS

+
 def is_all(arg):
    """Return true if the argument is a special symbol for all nodes or edges."""
    return isinstance(arg, str) and arg == ALL

+
 # pylint: disable=invalid-name
 _default_formatwarning = warnings.formatwarning

+
 class DGLWarning(UserWarning):
    """DGL Warning class."""

+
 # pylint: disable=unused-argument
 def dgl_warning_format(message, category, filename, lineno, line=None):
    """Format DGL warnings."""
    if isinstance(category, DGLWarning):
        return "DGL Warning: {}\n".format(message)
    else:
-        return _default_formatwarning(message, category, filename, lineno, line=None)
+        return _default_formatwarning(
+            message, category, filename, lineno, line=None
+        )
+

 def dgl_warning(message, category=DGLWarning, stacklevel=2):
    """DGL warning wrapper that defaults to ``DGLWarning`` instead of ``UserWarning`` category."""
    return warnings.warn(message, category=category, stacklevel=stacklevel)

+
 warnings.formatwarning = dgl_warning_format

 _init_internal_api()
--- a/python/dgl/container.py
+++ b/python/dgl/container.py
@@ -2,9 +2,10 @@
 reference: tvm/python/tvm/collections.py
 """
 from __future__ import absolute_import as _abs
+
+from . import _api_internal
 from ._ffi.object import ObjectBase, register_object
 from ._ffi.object_generic import convert_to_object
-from . import _api_internal


 @register_object
@@ -29,8 +30,11 @@ class List(ObjectBase):
            return [self[idx] for idx in range(start, stop, step)]

        if i < -len(self) or i >= len(self):
-            raise IndexError("List index out of range. List size: {}, got index {}"
-                             .format(len(self), i))
+            raise IndexError(
+                "List index out of range. List size: {}, got index {}".format(
+                    len(self), i
+                )
+            )
        if i < 0:
            i += len(self)
        ret = _api_internal._ListGetItem(self, i)
@@ -60,7 +64,7 @@ class Map(ObjectBase):
    def items(self):
        """Get the items from the map"""
        akvs = _api_internal._MapItems(self)
-        return [(akvs[i], akvs[i+1]) for i in range(0, len(akvs), 2)]
+        return [(akvs[i], akvs[i + 1]) for i in range(0, len(akvs), 2)]

    def __len__(self):
        return _api_internal._MapSize(self)
@@ -76,12 +80,13 @@ class StrMap(Map):
    def items(self):
        """Get the items from the map"""
        akvs = _api_internal._MapItems(self)
-        return [(akvs[i], akvs[i+1]) for i in range(0, len(akvs), 2)]
+        return [(akvs[i], akvs[i + 1]) for i in range(0, len(akvs), 2)]


 @register_object
 class Value(ObjectBase):
    """Object wrapper for various values."""
+
    @property
    def data(self):
        """Return the value data."""

--- a/python/dgl/core.py
+++ b/python/dgl/core.py
@@ -2,17 +2,19 @@
 # pylint: disable=not-callable
 import numpy as np

-from .base import DGLError, is_all, NID, EID, ALL, dgl_warning
 from . import backend as F
 from . import function as fn
-from .frame import Frame
-from .udf import NodeBatch, EdgeBatch
 from . import ops
+from .base import ALL, EID, NID, DGLError, dgl_warning, is_all
+from .frame import Frame
+from .udf import EdgeBatch, NodeBatch
+

 def is_builtin(func):
    """Return true if the function is a DGL builtin function."""
    return isinstance(func, fn.BuiltinFunction)

+
 def invoke_node_udf(graph, nid, ntype, func, *, ndata=None, orig_nid=None):
    """Invoke user-defined node function on the given nodes.

@@ -43,9 +45,12 @@ def invoke_node_udf(graph, nid, ntype, func, *, ndata=None, orig_nid=None):
            nid = graph.nodes(ntype=ntype)
        else:
            ndata = graph._node_frames[ntid].subframe(nid)
-    nbatch = NodeBatch(graph, nid if orig_nid is None else orig_nid, ntype, ndata)
+    nbatch = NodeBatch(
+        graph, nid if orig_nid is None else orig_nid, ntype, ndata
+    )
    return func(nbatch)

+
 def invoke_edge_udf(graph, eid, etype, func, *, orig_eid=None):
    """Invoke user-defined edge function on the given edges.

@@ -70,20 +75,29 @@ def invoke_edge_udf(graph, eid, etype, func, *, orig_eid=None):
    etid = graph.get_etype_id(etype)
    stid, dtid = graph._graph.metagraph.find_edge(etid)
    if is_all(eid):
-        u, v, eid = graph.edges(form='all')
+        u, v, eid = graph.edges(form="all")
        edata = graph._edge_frames[etid]
    else:
        u, v = graph.find_edges(eid)
        edata = graph._edge_frames[etid].subframe(eid)
    if len(u) == 0:
-        dgl_warning('The input graph for the user-defined edge function ' \
-                    'does not contain valid edges')
+        dgl_warning(
+            "The input graph for the user-defined edge function "
+            "does not contain valid edges"
+        )
    srcdata = graph._node_frames[stid].subframe(u)
    dstdata = graph._node_frames[dtid].subframe(v)
-    ebatch = EdgeBatch(graph, eid if orig_eid is None else orig_eid,
-                       etype, srcdata, edata, dstdata)
+    ebatch = EdgeBatch(
+        graph,
+        eid if orig_eid is None else orig_eid,
+        etype,
+        srcdata,
+        edata,
+        dstdata,
+    )
    return func(ebatch)

+
 def invoke_udf_reduce(graph, func, msgdata, *, orig_nid=None):
    """Invoke user-defined reduce function on all the nodes in the graph.

@@ -119,7 +133,9 @@ def invoke_udf_reduce(graph, func, msgdata, *, orig_nid=None):
    unique_degs, bucketor = _bucketing(degs)
    bkt_rsts = []
    bkt_nodes = []
-    for deg, node_bkt, orig_nid_bkt in zip(unique_degs, bucketor(nodes), bucketor(orig_nid)):
+    for deg, node_bkt, orig_nid_bkt in zip(
+        unique_degs, bucketor(nodes), bucketor(orig_nid)
+    ):
        if deg == 0:
            # skip reduce function for zero-degree nodes
            continue
@@ -127,7 +143,7 @@ def invoke_udf_reduce(graph, func, msgdata, *, orig_nid=None):
        ndata_bkt = dstdata.subframe(node_bkt)

        # order the incoming edges per node by edge ID
-        eid_bkt = F.zerocopy_to_numpy(graph.in_edges(node_bkt, form='eid'))
+        eid_bkt = F.zerocopy_to_numpy(graph.in_edges(node_bkt, form="eid"))
        assert len(eid_bkt) == deg * len(node_bkt)
        eid_bkt = np.sort(eid_bkt.reshape((len(node_bkt), deg)), 1)
        eid_bkt = F.zerocopy_from_numpy(eid_bkt.flatten())
@@ -148,7 +164,9 @@ def invoke_udf_reduce(graph, func, msgdata, *, orig_nid=None):
    retf._default_initializer = dstdata._default_initializer

    # merge bucket results and write to the result frame
-    if len(bkt_rsts) != 0:  # if all the nodes have zero degree, no need to merge results.
+    if (
+        len(bkt_rsts) != 0
+    ):  # if all the nodes have zero degree, no need to merge results.
        merged_rst = {}
        for k in bkt_rsts[0].keys():
            merged_rst[k] = F.cat([rst[k] for rst in bkt_rsts], dim=0)
@@ -157,6 +175,7 @@ def invoke_udf_reduce(graph, func, msgdata, *, orig_nid=None):

    return retf

+
 def _bucketing(val):
    """Internal function to create groups on the values.

@@ -179,11 +198,14 @@ def _bucketing(val):
    for v in unique_val:
        eqidx = F.nonzero_1d(F.equal(sorted_val, v))
        bkt_idx.append(F.gather_row(idx, eqidx))
+
    def bucketor(data):
        bkts = [F.gather_row(data, idx) for idx in bkt_idx]
        return bkts
+
    return unique_val, bucketor

+
 def data_dict_to_list(graph, data_dict, func, target):
    """Get node or edge feature data of the given name for all the types.

@@ -206,23 +228,23 @@ def data_dict_to_list(graph, data_dict, func, target):
        data of type ``types[i]``.
    """
    if isinstance(func, fn.BinaryMessageFunction):
-        if target in ['u', 'v']:
+        if target in ["u", "v"]:
            output_list = [None] * graph._graph.number_of_ntypes()
            for srctype, _, dsttype in graph.canonical_etypes:
-                if target == 'u':
+                if target == "u":
                    src_id = graph.get_ntype_id(srctype)
                    output_list[src_id] = data_dict[srctype]
                else:
                    dst_id = graph.get_ntype_id(dsttype)
                    output_list[dst_id] = data_dict[dsttype]
-        else: # target == 'e'
+        else:  # target == 'e'
            output_list = [None] * graph._graph.number_of_etypes()
            for rel in graph.canonical_etypes:
                etid = graph.get_etype_id(rel)
                output_list[etid] = data_dict[rel]
        return output_list
    else:
-        if target == 'u':
+        if target == "u":
            lhs_list = [None] * graph._graph.number_of_ntypes()
            if not isinstance(data_dict, dict):
                src_id, _ = graph._graph.metagraph.find_edge(0)
@@ -232,13 +254,14 @@ def data_dict_to_list(graph, data_dict, func, target):
                    src_id = graph.get_ntype_id(srctype)
                    lhs_list[src_id] = data_dict[srctype]
            return lhs_list
-        else: # target == 'e':
+        else:  # target == 'e':
            rhs_list = [None] * graph._graph.number_of_etypes()
            for rel in graph.canonical_etypes:
                etid = graph.get_etype_id(rel)
                rhs_list[etid] = data_dict[rel]
            return rhs_list

+
 def invoke_gsddmm(graph, func):
    """Invoke g-SDDMM computation on the graph.

@@ -270,13 +293,16 @@ def invoke_gsddmm(graph, func):
        if graph._graph.number_of_etypes() > 1:
            # Convert to list as dict is unordered.
            if func.name == "copy_u":
-                x = data_dict_to_list(graph, x, func, 'u')
-            else: # "copy_e"
-                x = data_dict_to_list(graph, x, func, 'e')
+                x = data_dict_to_list(graph, x, func, "u")
+            else:  # "copy_e"
+                x = data_dict_to_list(graph, x, func, "e")
        z = op(graph, x)
-    return {func.out_field : z}
+    return {func.out_field: z}
+

-def invoke_gspmm(graph, mfunc, rfunc, *, srcdata=None, dstdata=None, edata=None):
+def invoke_gspmm(
+    graph, mfunc, rfunc, *, srcdata=None, dstdata=None, edata=None
+):
    """Invoke g-SPMM computation on the graph.

    Parameters
@@ -301,9 +327,11 @@ def invoke_gspmm(graph, mfunc, rfunc, *, srcdata=None, dstdata=None, edata=None)
    """
    # sanity check
    if mfunc.out_field != rfunc.msg_field:
-        raise DGLError('Invalid message ({}) and reduce ({}) function pairs.'
-                       ' The output field of the message function must be equal to the'
-                       ' message field of the reduce function.'.format(mfunc, rfunc))
+        raise DGLError(
+            "Invalid message ({}) and reduce ({}) function pairs."
+            " The output field of the message function must be equal to the"
+            " message field of the reduce function.".format(mfunc, rfunc)
+        )
    if edata is None:
        edata = graph.edata
    if srcdata is None:
@@ -315,7 +343,7 @@ def invoke_gspmm(graph, mfunc, rfunc, *, srcdata=None, dstdata=None, edata=None)
    if isinstance(mfunc, fn.BinaryMessageFunction):
        x = alldata[mfunc.lhs][mfunc.lhs_field]
        y = alldata[mfunc.rhs][mfunc.rhs_field]
-        op = getattr(ops, '{}_{}'.format(mfunc.name, rfunc.name))
+        op = getattr(ops, "{}_{}".format(mfunc.name, rfunc.name))
        if graph._graph.number_of_etypes() > 1:
            lhs_target, _, rhs_target = mfunc.name.split("_", 2)
            x = data_dict_to_list(graph, x, mfunc, lhs_target)
@@ -323,14 +351,15 @@ def invoke_gspmm(graph, mfunc, rfunc, *, srcdata=None, dstdata=None, edata=None)
        z = op(graph, x, y)
    else:
        x = alldata[mfunc.target][mfunc.in_field]
-        op = getattr(ops, '{}_{}'.format(mfunc.name, rfunc.name))
+        op = getattr(ops, "{}_{}".format(mfunc.name, rfunc.name))
        if graph._graph.number_of_etypes() > 1 and not isinstance(x, tuple):
            if mfunc.name == "copy_u":
-                x = data_dict_to_list(graph, x, mfunc, 'u')
-            else: # "copy_e"
-                x = data_dict_to_list(graph, x, mfunc, 'e')
+                x = data_dict_to_list(graph, x, mfunc, "u")
+            else:  # "copy_e"
+                x = data_dict_to_list(graph, x, mfunc, "e")
        z = op(graph, x)
-    return {rfunc.out_field : z}
+    return {rfunc.out_field: z}
+

 def message_passing(g, mfunc, rfunc, afunc):
    """Invoke message passing computation on the whole graph.
@@ -351,8 +380,12 @@ def message_passing(g, mfunc, rfunc, afunc):
    dict[str, Tensor]
        Results from the message passing computation.
    """
-    if (is_builtin(mfunc) and is_builtin(rfunc) and
-            getattr(ops, '{}_{}'.format(mfunc.name, rfunc.name), None) is not None):
+    if (
+        is_builtin(mfunc)
+        and is_builtin(rfunc)
+        and getattr(ops, "{}_{}".format(mfunc.name, rfunc.name), None)
+        is not None
+    ):
        # invoke fused message passing
        ndata = invoke_gspmm(g, mfunc, rfunc)
    else:
@@ -362,7 +395,9 @@ def message_passing(g, mfunc, rfunc, afunc):
            msgdata = invoke_gsddmm(g, mfunc)
        else:
            orig_eid = g.edata.get(EID, None)
-            msgdata = invoke_edge_udf(g, ALL, g.canonical_etypes[0], mfunc, orig_eid=orig_eid)
+            msgdata = invoke_edge_udf(
+                g, ALL, g.canonical_etypes[0], mfunc, orig_eid=orig_eid
+            )
        # reduce phase
        if is_builtin(rfunc):
            msg = rfunc.msg_field
@@ -372,9 +407,11 @@ def message_passing(g, mfunc, rfunc, afunc):
            ndata = invoke_udf_reduce(g, rfunc, msgdata, orig_nid=orig_nid)
    # apply phase
    if afunc is not None:
-        for k, v in g.dstdata.items():   # include original node features
+        for k, v in g.dstdata.items():  # include original node features
            if k not in ndata:
                ndata[k] = v
        orig_nid = g.dstdata.get(NID, None)
-        ndata = invoke_node_udf(g, ALL, g.dsttypes[0], afunc, ndata=ndata, orig_nid=orig_nid)
+        ndata = invoke_node_udf(
+            g, ALL, g.dsttypes[0], afunc, ndata=ndata, orig_nid=orig_nid
+        )
    return ndata
--- a/python/dgl/cuda/nccl.py
+++ b/python/dgl/cuda/nccl.py
@@ -3,27 +3,25 @@
 from .. import backend as F
 from .._ffi.function import _init_api

-_COMM_MODES_MAP = {
-    'remainder': 0
-}
+_COMM_MODES_MAP = {"remainder": 0}
+

 class UniqueId(object):
-    """ Class for allowing python code to create and communicate NCCL Unique
-        IDs, needed for creating communicators.
+    """Class for allowing python code to create and communicate NCCL Unique
+    IDs, needed for creating communicators.
    """
+
    def __init__(self, id_str=None):
-        """ Create an object reference the current NCCL unique id.
-        """
+        """Create an object reference the current NCCL unique id."""
        if id_str:
            if isinstance(id_str, bytes):
-                id_str = id_str.decode('utf-8')
+                id_str = id_str.decode("utf-8")
            self._handle = _CAPI_DGLNCCLUniqueIdFromString(id_str)
        else:
            self._handle = _CAPI_DGLNCCLGetUniqueId()

    def get(self):
-        """ Get the C-handle for this object.
-        """
+        """Get the C-handle for this object."""
        return self._handle

    def __str__(self):
@@ -37,187 +35,196 @@ class UniqueId(object):


 class Communicator(object):
-    """ High-level wrapper for NCCL communication.
-    """
+    """High-level wrapper for NCCL communication."""
+
    def __init__(self, size, rank, unique_id):
-        """ Create a new NCCL communicator.
+        """Create a new NCCL communicator.

-            Parameters
-            ----------
-            size : int
-                The number of processes in the communicator.
-            rank : int
-                The rank of the current process in the communicator.
-            unique_id : NCCLUniqueId
-                The unique id of the root process (rank=0).
+        Parameters
+        ----------
+        size : int
+            The number of processes in the communicator.
+        rank : int
+            The rank of the current process in the communicator.
+        unique_id : NCCLUniqueId
+            The unique id of the root process (rank=0).

-            Examples
-            --------
+        Examples
+        --------

-            >>> from dgl.cuda.nccl import Communicator, UniqueId
+        >>> from dgl.cuda.nccl import Communicator, UniqueId

-            The root process will generate a unique NCCL id and communicate it
-            to the other processes.
+        The root process will generate a unique NCCL id and communicate it
+        to the other processes.

-            >>> uid = UniqueId()
-            >>> store.set('nccl_root_id', str(uid))
+        >>> uid = UniqueId()
+        >>> store.set('nccl_root_id', str(uid))

-            And all other processes create unique ids from the root processes.
+        And all other processes create unique ids from the root processes.

-            >>> uid = UniqueId(store.get('nccl_root_id'))
+        >>> uid = UniqueId(store.get('nccl_root_id'))

-            Then, all processes should create the communicator.
+        Then, all processes should create the communicator.

-            >>> comm = Communicator(world_size, rank, uid)
+        >>> comm = Communicator(world_size, rank, uid)
        """
-        assert rank < size, "The rank of a process must be less than the " \
+        assert rank < size, (
+            "The rank of a process must be less than the "
            "size of the communicator."
+        )
        self._handle = _CAPI_DGLNCCLCreateComm(size, rank, unique_id.get())
        self._rank = rank
        self._size = size

    def sparse_all_to_all_push(self, idx, value, partition):
-        """ Perform an all-to-all-v operation, where by all processors send out
-            a set of indices and corresponding values. Indices and values,
-            corresponding to the current process, will copied into the output
-            arrays.
-
-            Parameters
-            ----------
-            idx : tensor
-                The 1D set of indices to send to other processors.
-            value : tensor
-                The multi-dimension set of values to send to other processors.
-                The first dimension must match that of `idx`.
-            partition : NDArrayPartition
-                The object containing information for assigning indices to
-                processors.
-
-            Returns
-            -------
-            tensor
-                The 1D tensor of the recieved indices.
-            tensor
-                The set of recieved values.
-
-            Examples
-            --------
-
-            To perform a sparse_all_to_all_push(), a partition object must be
-            provided. A partition of a homgeonous graph, where the vertices are
-            striped across processes can be generated via:
-
-            >>> from dgl.partition import NDArrayPartition
-            >>> part = NDArrayPartition(g.num_nodes(), comm.size(), mode='remainder' )
-
-            With this partition, each processor can send values to be associatd
-            with vertices in the graph. So if we have an array `global_idxs` of all of
-            the neighbors updated during mini-batch processing, and an array
-            `global_values` containing the new values associated with the neighbors,
-            we communicate them to the own processes via:
-
-            >>> my_idxs, my_values = comm.sparse_all_to_all_push(global_idxs, global_values, part)
-
-            This communication pattern is common when communicating gradient
-            updates for node embeddings.
-
-            Indices the current process owns, do not need to treated specially,
-            as internally they will be copied to the output array. If we have a
-            set of indices in process 0 '[0, 3, 8, 9, 10]` and for process 1
-            '[0, 2, 4, 5, 8, 8, 9]'. Using a remainder partition will result
-            indices for processe 0 of '[0, 8, 10, 0, 2, 4, 8, 8]', and for
-            process 1 of '[3, 9, 5, 9]'.
+        """Perform an all-to-all-v operation, where by all processors send out
+        a set of indices and corresponding values. Indices and values,
+        corresponding to the current process, will copied into the output
+        arrays.
+
+        Parameters
+        ----------
+        idx : tensor
+            The 1D set of indices to send to other processors.
+        value : tensor
+            The multi-dimension set of values to send to other processors.
+            The first dimension must match that of `idx`.
+        partition : NDArrayPartition
+            The object containing information for assigning indices to
+            processors.
+
+        Returns
+        -------
+        tensor
+            The 1D tensor of the recieved indices.
+        tensor
+            The set of recieved values.
+
+        Examples
+        --------
+
+        To perform a sparse_all_to_all_push(), a partition object must be
+        provided. A partition of a homgeonous graph, where the vertices are
+        striped across processes can be generated via:
+
+        >>> from dgl.partition import NDArrayPartition
+        >>> part = NDArrayPartition(g.num_nodes(), comm.size(), mode='remainder' )
+
+        With this partition, each processor can send values to be associatd
+        with vertices in the graph. So if we have an array `global_idxs` of all of
+        the neighbors updated during mini-batch processing, and an array
+        `global_values` containing the new values associated with the neighbors,
+        we communicate them to the own processes via:
+
+        >>> my_idxs, my_values = comm.sparse_all_to_all_push(global_idxs, global_values, part)
+
+        This communication pattern is common when communicating gradient
+        updates for node embeddings.
+
+        Indices the current process owns, do not need to treated specially,
+        as internally they will be copied to the output array. If we have a
+        set of indices in process 0 '[0, 3, 8, 9, 10]` and for process 1
+        '[0, 2, 4, 5, 8, 8, 9]'. Using a remainder partition will result
+        indices for processe 0 of '[0, 8, 10, 0, 2, 4, 8, 8]', and for
+        process 1 of '[3, 9, 5, 9]'.
        """
        out_idx, out_value = _CAPI_DGLNCCLSparseAllToAllPush(
-            self.get(), F.zerocopy_to_dgl_ndarray(idx),
+            self.get(),
+            F.zerocopy_to_dgl_ndarray(idx),
            F.zerocopy_to_dgl_ndarray(value),
-            partition.get())
-        return (F.zerocopy_from_dgl_ndarray(out_idx),
-                F.zerocopy_from_dgl_ndarray(out_value))
+            partition.get(),
+        )
+        return (
+            F.zerocopy_from_dgl_ndarray(out_idx),
+            F.zerocopy_from_dgl_ndarray(out_value),
+        )

    def sparse_all_to_all_pull(self, req_idx, value, partition):
-        """ Perform an all-to-all-v operation, where by all processors request
-            the values corresponding to their set of indices.
-
-            Parameters
-            ----------
-            req_idx : IdArray
-                The set of indices this processor is requesting.
-            value : NDArray
-                The multi-dimension set of values that can be requested from
-                this processor.
-            partition : NDArrayPartition
-                The object containing information for assigning indices to
-                processors.
-
-            Returns
-            -------
-            tensor
-                The set of recieved values, corresponding to `req_idx`.
-
-            Examples
-            --------
-
-            To perform a sparse_all_to_all_pull(), a partition object must be
-            provided. A partition of a homgeonous graph, where the vertices are
-            striped across processes can be generated via:
-
-            >>> from dgl.partition import NDArrayPartition
-            >>> part = NDArrayPartition(g.num_nodes(), comm.size(), mode='remainder' )
-
-            With this partition, each processor can request values/features
-            associated with vertices in the graph. So in the case where we have
-            a set of neighbors 'nbr_idxs' we need features for, and each process
-            has a tensor 'node_feat' storing the features of nodes it owns in
-            the partition, the features can be requested via:
-
-            >>> nbr_values = comm.sparse_all_to_all_pull(nbr_idxs, node_feat, part)
-
-            Then two the arrays 'nbr_idxs' and 'nbr_values' forms the sparse
-            set of features, where 'nbr_idxs[i]' is the global node id, and
-            'nbr_values[i]' is the feature vector for that node. This
-            communication pattern is useful for node features or node
-            embeddings.
+        """Perform an all-to-all-v operation, where by all processors request
+        the values corresponding to their set of indices.
+
+        Parameters
+        ----------
+        req_idx : IdArray
+            The set of indices this processor is requesting.
+        value : NDArray
+            The multi-dimension set of values that can be requested from
+            this processor.
+        partition : NDArrayPartition
+            The object containing information for assigning indices to
+            processors.
+
+        Returns
+        -------
+        tensor
+            The set of recieved values, corresponding to `req_idx`.
+
+        Examples
+        --------
+
+        To perform a sparse_all_to_all_pull(), a partition object must be
+        provided. A partition of a homgeonous graph, where the vertices are
+        striped across processes can be generated via:
+
+        >>> from dgl.partition import NDArrayPartition
+        >>> part = NDArrayPartition(g.num_nodes(), comm.size(), mode='remainder' )
+
+        With this partition, each processor can request values/features
+        associated with vertices in the graph. So in the case where we have
+        a set of neighbors 'nbr_idxs' we need features for, and each process
+        has a tensor 'node_feat' storing the features of nodes it owns in
+        the partition, the features can be requested via:
+
+        >>> nbr_values = comm.sparse_all_to_all_pull(nbr_idxs, node_feat, part)
+
+        Then two the arrays 'nbr_idxs' and 'nbr_values' forms the sparse
+        set of features, where 'nbr_idxs[i]' is the global node id, and
+        'nbr_values[i]' is the feature vector for that node. This
+        communication pattern is useful for node features or node
+        embeddings.
        """
        out_value = _CAPI_DGLNCCLSparseAllToAllPull(
-            self.get(), F.zerocopy_to_dgl_ndarray(req_idx),
+            self.get(),
+            F.zerocopy_to_dgl_ndarray(req_idx),
            F.zerocopy_to_dgl_ndarray(value),
-            partition.get())
+            partition.get(),
+        )
        return F.zerocopy_from_dgl_ndarray(out_value)

    def get(self):
-        """ Get the C-Handle for this object.
-        """
+        """Get the C-Handle for this object."""
        return self._handle

    def rank(self):
-        """ Get the rank of this process in this communicator.
+        """Get the rank of this process in this communicator.

-            Returns
-            -------
-            int
-                The rank of this process.
+        Returns
+        -------
+        int
+            The rank of this process.
        """
        return self._rank

    def size(self):
-        """ Get the size of this communicator.
+        """Get the size of this communicator.

-            Returns
-            -------
-            int
-                The number of processes in this communicator.
+        Returns
+        -------
+        int
+            The number of processes in this communicator.
        """
        return self._size

+
 def is_supported():
-    """ Check if DGL was built with NCCL support.
+    """Check if DGL was built with NCCL support.

-        Returns
-        -------
-        bool
-            True if NCCL support was built in.
+    Returns
+    -------
+    bool
+        True if NCCL support was built in.
    """
    return _CAPI_DGLNCCLHasSupport()

+
 _init_api("dgl.cuda.nccl")
--- a/python/dgl/data/__init__.py
+++ b/python/dgl/data/__init__.py
@@ -5,55 +5,74 @@ for downloading, processing, saving and loading data from external resources.
 from __future__ import absolute_import

 from . import citation_graph as citegrh
-from .citation_graph import CoraBinary, CitationGraphDataset
-from .minigc import *
-from .tree import SST, SSTDataset
-from .utils import *
-from .sbm import SBMMixture, SBMMixtureDataset
-from .reddit import RedditDataset
-from .ppi import PPIDataset, LegacyPPIDataset
-from .tu import TUDataset, LegacyTUDataset
-from .gnn_benchmark import AmazonCoBuy, CoraFull, Coauthor, AmazonCoBuyComputerDataset, \
-    AmazonCoBuyPhotoDataset, CoauthorPhysicsDataset, CoauthorCSDataset, CoraFullDataset
-from .karate import KarateClub, KarateClubDataset
-from .gindt import GINDataset
+from .adapter import *
 from .bitcoinotc import BitcoinOTC, BitcoinOTCDataset
+from .citation_graph import (
+    CitationGraphDataset,
+    CiteseerGraphDataset,
+    CoraBinary,
+    CoraGraphDataset,
+    PubmedGraphDataset,
+)
+from .csv_dataset import CSVDataset
+from .dgl_dataset import DGLBuiltinDataset, DGLDataset
+from .fakenews import FakeNewsDataset
+from .flickr import FlickrDataset
+from .fraud import FraudAmazonDataset, FraudDataset, FraudYelpDataset
 from .gdelt import GDELT, GDELTDataset
+from .gindt import GINDataset
+from .gnn_benchmark import (
+    AmazonCoBuy,
+    AmazonCoBuyComputerDataset,
+    AmazonCoBuyPhotoDataset,
+    Coauthor,
+    CoauthorCSDataset,
+    CoauthorPhysicsDataset,
+    CoraFull,
+    CoraFullDataset,
+)
 from .icews18 import ICEWS18, ICEWS18Dataset
+from .karate import KarateClub, KarateClubDataset
+from .knowledge_graph import FB15k237Dataset, FB15kDataset, WN18Dataset
+from .minigc import *
+from .ppi import LegacyPPIDataset, PPIDataset
 from .qm7b import QM7b, QM7bDataset
 from .qm9 import QM9, QM9Dataset
 from .qm9_edge import QM9Edge, QM9EdgeDataset
-from .dgl_dataset import DGLDataset, DGLBuiltinDataset
-from .citation_graph import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
-from .knowledge_graph import FB15k237Dataset, FB15kDataset, WN18Dataset
-from .rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
-from .fraud import FraudDataset, FraudYelpDataset, FraudAmazonDataset
-from .fakenews import FakeNewsDataset
-from .csv_dataset import CSVDataset
-from .adapter import *
-from .synthetic import BAShapeDataset, BACommunityDataset, TreeCycleDataset, TreeGridDataset, BA2MotifDataset
+from .rdf import AIFBDataset, AMDataset, BGSDataset, MUTAGDataset
+from .reddit import RedditDataset
+from .sbm import SBMMixture, SBMMixtureDataset
+from .synthetic import (
+    BA2MotifDataset,
+    BACommunityDataset,
+    BAShapeDataset,
+    TreeCycleDataset,
+    TreeGridDataset,
+)
+from .tree import SST, SSTDataset
+from .tu import LegacyTUDataset, TUDataset
+from .utils import *
 from .wikics import WikiCSDataset
-from .flickr import FlickrDataset
 from .yelp import YelpDataset

+
 def register_data_args(parser):
    parser.add_argument(
        "--dataset",
        type=str,
        required=False,
-        help=
-        "The input dataset. Can be cora, citeseer, pubmed, syn(synthetic dataset) or reddit"
+        help="The input dataset. Can be cora, citeseer, pubmed, syn(synthetic dataset) or reddit",
    )


 def load_data(args):
-    if args.dataset == 'cora':
+    if args.dataset == "cora":
        return citegrh.load_cora()
-    elif args.dataset == 'citeseer':
+    elif args.dataset == "citeseer":
        return citegrh.load_citeseer()
-    elif args.dataset == 'pubmed':
+    elif args.dataset == "pubmed":
        return citegrh.load_pubmed()
-    elif args.dataset is not None and args.dataset.startswith('reddit'):
-        return RedditDataset(self_loop=('self-loop' in args.dataset))
+    elif args.dataset is not None and args.dataset.startswith("reddit"):
+        return RedditDataset(self_loop=("self-loop" in args.dataset))
    else:
-        raise ValueError('Unknown dataset: {}'.format(args.dataset))
+        raise ValueError("Unknown dataset: {}".format(args.dataset))
--- a/python/dgl/data/adapter.py
+++ b/python/dgl/data/adapter.py
 """Dataset adapters for re-purposing a dataset for a different kind of training task."""

-import os
 import json
+import os
+
 import numpy as np

 from .. import backend as F
+from ..base import DGLError
 from ..convert import graph as create_dgl_graph
 from ..sampling.negative import _calc_redundancy
-from .dgl_dataset import DGLDataset
 from . import utils
-from ..base import DGLError
-from .. import backend as F
+from .dgl_dataset import DGLDataset

-__all__ = ['AsNodePredDataset', 'AsLinkPredDataset', 'AsGraphPredDataset']
+__all__ = ["AsNodePredDataset", "AsLinkPredDataset", "AsGraphPredDataset"]


 class AsNodePredDataset(DGLDataset):
@@ -77,83 +77,118 @@ class AsNodePredDataset(DGLDataset):
    True
    """

-    def __init__(self,
-                 dataset,
-                 split_ratio=None,
-                 target_ntype=None,
-                 **kwargs):
+    def __init__(self, dataset, split_ratio=None, target_ntype=None, **kwargs):
        self.dataset = dataset
        self.split_ratio = split_ratio
        self.target_ntype = target_ntype
-        super().__init__(self.dataset.name + '-as-nodepred',
-                         hash_key=(split_ratio, target_ntype, dataset.name, 'nodepred'), **kwargs)
+        super().__init__(
+            self.dataset.name + "-as-nodepred",
+            hash_key=(split_ratio, target_ntype, dataset.name, "nodepred"),
+            **kwargs
+        )

    def process(self):
-        is_ogb = hasattr(self.dataset, 'get_idx_split')
+        is_ogb = hasattr(self.dataset, "get_idx_split")
        if is_ogb:
            g, label = self.dataset[0]
            self.g = g.clone()
-            self.g.ndata['label'] = F.reshape(label, (g.num_nodes(),))
+            self.g.ndata["label"] = F.reshape(label, (g.num_nodes(),))
        else:
            self.g = self.dataset[0].clone()

-        if 'label' not in self.g.nodes[self.target_ntype].data:
-            raise ValueError("Missing node labels. Make sure labels are stored "
-                             "under name 'label'.")
+        if "label" not in self.g.nodes[self.target_ntype].data:
+            raise ValueError(
+                "Missing node labels. Make sure labels are stored "
+                "under name 'label'."
+            )

        if self.split_ratio is None:
            if is_ogb:
                split = self.dataset.get_idx_split()
-                train_idx, val_idx, test_idx = split['train'], split['valid'], split['test']
+                train_idx, val_idx, test_idx = (
+                    split["train"],
+                    split["valid"],
+                    split["test"],
+                )
                n = self.g.num_nodes()
-                train_mask = utils.generate_mask_tensor(utils.idx2mask(train_idx, n))
-                val_mask = utils.generate_mask_tensor(utils.idx2mask(val_idx, n))
-                test_mask = utils.generate_mask_tensor(utils.idx2mask(test_idx, n))
-                self.g.ndata['train_mask'] = train_mask
-                self.g.ndata['val_mask'] = val_mask
-                self.g.ndata['test_mask'] = test_mask
+                train_mask = utils.generate_mask_tensor(
+                    utils.idx2mask(train_idx, n)
+                )
+                val_mask = utils.generate_mask_tensor(
+                    utils.idx2mask(val_idx, n)
+                )
+                test_mask = utils.generate_mask_tensor(
+                    utils.idx2mask(test_idx, n)
+                )
+                self.g.ndata["train_mask"] = train_mask
+                self.g.ndata["val_mask"] = val_mask
+                self.g.ndata["test_mask"] = test_mask
            else:
-                assert "train_mask" in self.g.nodes[self.target_ntype].data, \
-                    "train_mask is not provided, please specify split_ratio to generate the masks"
-                assert "val_mask" in self.g.nodes[self.target_ntype].data, \
-                    "val_mask is not provided, please specify split_ratio to generate the masks"
-                assert "test_mask" in self.g.nodes[self.target_ntype].data, \
-                    "test_mask is not provided, please specify split_ratio to generate the masks"
+                assert (
+                    "train_mask" in self.g.nodes[self.target_ntype].data
+                ), "train_mask is not provided, please specify split_ratio to generate the masks"
+                assert (
+                    "val_mask" in self.g.nodes[self.target_ntype].data
+                ), "val_mask is not provided, please specify split_ratio to generate the masks"
+                assert (
+                    "test_mask" in self.g.nodes[self.target_ntype].data
+                ), "test_mask is not provided, please specify split_ratio to generate the masks"
        else:
            if self.verbose:
-                print('Generating train/val/test masks...')
+                print("Generating train/val/test masks...")
            utils.add_nodepred_split(self, self.split_ratio, self.target_ntype)

        self._set_split_index()

-        self.num_classes = getattr(self.dataset, 'num_classes', None)
+        self.num_classes = getattr(self.dataset, "num_classes", None)
        if self.num_classes is None:
-            self.num_classes = len(F.unique(self.g.nodes[self.target_ntype].data['label']))
+            self.num_classes = len(
+                F.unique(self.g.nodes[self.target_ntype].data["label"])
+            )

    def has_cache(self):
-        return os.path.isfile(os.path.join(self.save_path, 'graph_{}.bin'.format(self.hash)))
+        return os.path.isfile(
+            os.path.join(self.save_path, "graph_{}.bin".format(self.hash))
+        )

    def load(self):
-        with open(os.path.join(self.save_path, 'info_{}.json'.format(self.hash)), 'r') as f:
+        with open(
+            os.path.join(self.save_path, "info_{}.json".format(self.hash)), "r"
+        ) as f:
            info = json.load(f)
-            if (info['split_ratio'] != self.split_ratio
-                    or info['target_ntype'] != self.target_ntype):
-                raise ValueError('Provided split ratio is different from the cached file. '
-                                 'Re-process the dataset.')
-            self.split_ratio = info['split_ratio']
-            self.target_ntype = info['target_ntype']
-            self.num_classes = info['num_classes']
-        gs, _ = utils.load_graphs(os.path.join(self.save_path, 'graph_{}.bin'.format(self.hash)))
+            if (
+                info["split_ratio"] != self.split_ratio
+                or info["target_ntype"] != self.target_ntype
+            ):
+                raise ValueError(
+                    "Provided split ratio is different from the cached file. "
+                    "Re-process the dataset."
+                )
+            self.split_ratio = info["split_ratio"]
+            self.target_ntype = info["target_ntype"]
+            self.num_classes = info["num_classes"]
+        gs, _ = utils.load_graphs(
+            os.path.join(self.save_path, "graph_{}.bin".format(self.hash))
+        )
        self.g = gs[0]
        self._set_split_index()

    def save(self):
-        utils.save_graphs(os.path.join(self.save_path, 'graph_{}.bin'.format(self.hash)), [self.g])
-        with open(os.path.join(self.save_path, 'info_{}.json'.format(self.hash)), 'w') as f:
-            json.dump({
-                'split_ratio': self.split_ratio,
-                'target_ntype': self.target_ntype,
-                'num_classes': self.num_classes}, f)
+        utils.save_graphs(
+            os.path.join(self.save_path, "graph_{}.bin".format(self.hash)),
+            [self.g],
+        )
+        with open(
+            os.path.join(self.save_path, "info_{}.json".format(self.hash)), "w"
+        ) as f:
+            json.dump(
+                {
+                    "split_ratio": self.split_ratio,
+                    "target_ntype": self.target_ntype,
+                    "num_classes": self.num_classes,
+                },
+                f,
+            )

    def __getitem__(self, idx):
        return self.g
@@ -164,19 +199,18 @@ class AsNodePredDataset(DGLDataset):
    def _set_split_index(self):
        """Add train_idx/val_idx/test_idx as dataset attributes according to corresponding mask."""
        ndata = self.g.nodes[self.target_ntype].data
-        self.train_idx = F.nonzero_1d(ndata['train_mask'])
-        self.val_idx = F.nonzero_1d(ndata['val_mask'])
-        self.test_idx = F.nonzero_1d(ndata['test_mask'])
+        self.train_idx = F.nonzero_1d(ndata["train_mask"])
+        self.val_idx = F.nonzero_1d(ndata["val_mask"])
+        self.test_idx = F.nonzero_1d(ndata["test_mask"])


 def negative_sample(g, num_samples):
    """Random sample negative edges from graph, excluding self-loops,
-       the result samples might be less than num_samples
+    the result samples might be less than num_samples
    """
    num_nodes = g.num_nodes()
-    redundancy = _calc_redundancy(
-        num_samples, g.num_edges(), num_nodes ** 2)
-    sample_size = int(num_samples*(1+redundancy))
+    redundancy = _calc_redundancy(num_samples, g.num_edges(), num_nodes**2)
+    sample_size = int(num_samples * (1 + redundancy))
    edges = np.random.randint(0, num_nodes, size=(2, sample_size))
    edges = np.unique(edges, axis=1)
    # remove self loop
@@ -236,49 +270,71 @@ class AsLinkPredDataset(DGLDataset):
    True
    """

-    def __init__(self,
-                 dataset,
-                 split_ratio=None,
-                 neg_ratio=3,
-                 **kwargs):
+    def __init__(self, dataset, split_ratio=None, neg_ratio=3, **kwargs):
        self.g = dataset[0]
        self.num_nodes = self.g.num_nodes()
        self.dataset = dataset
        self.split_ratio = split_ratio
        self.neg_ratio = neg_ratio
-        super().__init__(dataset.name + '-as-linkpred',
-                         hash_key=(neg_ratio, split_ratio, dataset.name, 'linkpred'), **kwargs)
+        super().__init__(
+            dataset.name + "-as-linkpred",
+            hash_key=(neg_ratio, split_ratio, dataset.name, "linkpred"),
+            **kwargs
+        )

    def process(self):
        if self.split_ratio is None:
            # Handle logics for OGB link prediction dataset
-            assert hasattr(self.dataset, "get_edge_split"), \
-                "dataset doesn't have get_edge_split method, please specify split_ratio and neg_ratio to generate the split"
+            assert hasattr(
+                self.dataset, "get_edge_split"
+            ), "dataset doesn't have get_edge_split method, please specify split_ratio and neg_ratio to generate the split"
            # This is likely to be an ogb dataset
            self.edge_split = self.dataset.get_edge_split()
            self._train_graph = self.g
-            if 'source_node' in self.edge_split["test"]:
+            if "source_node" in self.edge_split["test"]:
                # Probably ogbl-citation2
-                pos_e = (self.edge_split["valid"]["source_node"], self.edge_split["valid"]["target_node"])
-                neg_e_size = self.edge_split["valid"]['target_node_neg'].shape[-1]
-                neg_e_src = np.repeat(self.edge_split['valid']['source_node'], neg_e_size)
-                neg_e_dst = np.reshape(self.edge_split["valid"]["target_node_neg"], -1)
+                pos_e = (
+                    self.edge_split["valid"]["source_node"],
+                    self.edge_split["valid"]["target_node"],
+                )
+                neg_e_size = self.edge_split["valid"]["target_node_neg"].shape[
+                    -1
+                ]
+                neg_e_src = np.repeat(
+                    self.edge_split["valid"]["source_node"], neg_e_size
+                )
+                neg_e_dst = np.reshape(
+                    self.edge_split["valid"]["target_node_neg"], -1
+                )
                self._val_edges = pos_e, (neg_e_src, neg_e_dst)
-                pos_e = (self.edge_split["test"]["source_node"], self.edge_split["test"]["target_node"])
-                neg_e_size = self.edge_split["test"]['target_node_neg'].shape[-1]
-                neg_e_src = np.repeat(self.edge_split['test']['source_node'], neg_e_size)
-                neg_e_dst = np.reshape(self.edge_split["test"]["target_node_neg"], -1)
+                pos_e = (
+                    self.edge_split["test"]["source_node"],
+                    self.edge_split["test"]["target_node"],
+                )
+                neg_e_size = self.edge_split["test"]["target_node_neg"].shape[
+                    -1
+                ]
+                neg_e_src = np.repeat(
+                    self.edge_split["test"]["source_node"], neg_e_size
+                )
+                neg_e_dst = np.reshape(
+                    self.edge_split["test"]["target_node_neg"], -1
+                )
                self._test_edges = pos_e, (neg_e_src, neg_e_dst)
-            elif 'edge' in self.edge_split["test"]:
+            elif "edge" in self.edge_split["test"]:
                # Probably ogbl-collab
-                pos_e_tensor, neg_e_tensor = self.edge_split["valid"][
-                    "edge"], self.edge_split["valid"]["edge_neg"]
+                pos_e_tensor, neg_e_tensor = (
+                    self.edge_split["valid"]["edge"],
+                    self.edge_split["valid"]["edge_neg"],
+                )
                pos_e = (pos_e_tensor[:, 0], pos_e_tensor[:, 1])
                neg_e = (neg_e_tensor[:, 0], neg_e_tensor[:, 1])
                self._val_edges = pos_e, neg_e

-                pos_e_tensor, neg_e_tensor = self.edge_split["test"][
-                    "edge"], self.edge_split["test"]["edge_neg"]
+                pos_e_tensor, neg_e_tensor = (
+                    self.edge_split["test"]["edge"],
+                    self.edge_split["test"]["edge_neg"],
+                )
                pos_e = (pos_e_tensor[:, 0], pos_e_tensor[:, 1])
                neg_e = (neg_e_tensor[:, 0], neg_e_tensor[:, 1])
                self._test_edges = pos_e, neg_e
@@ -292,40 +348,65 @@ class AsLinkPredDataset(DGLDataset):
            n = graph.num_edges()
            src, dst = graph.edges()
            src, dst = F.asnumpy(src), F.asnumpy(dst)
-            n_train, n_val, n_test = int(
-                n * ratio[0]), int(n * ratio[1]), int(n * ratio[2])
+            n_train, n_val, n_test = (
+                int(n * ratio[0]),
+                int(n * ratio[1]),
+                int(n * ratio[2]),
+            )

            idx = np.random.permutation(n)
            train_pos_idx = idx[:n_train]
-            val_pos_idx = idx[n_train:n_train+n_val]
-            test_pos_idx = idx[n_train+n_val:]
+            val_pos_idx = idx[n_train : n_train + n_val]
+            test_pos_idx = idx[n_train + n_val :]
            neg_src, neg_dst = negative_sample(
-                graph, self.neg_ratio*(n_val+n_test))
-            neg_n_val, neg_n_test = self.neg_ratio * n_val, self.neg_ratio * n_test
+                graph, self.neg_ratio * (n_val + n_test)
+            )
+            neg_n_val, neg_n_test = (
+                self.neg_ratio * n_val,
+                self.neg_ratio * n_test,
+            )
            neg_val_src, neg_val_dst = neg_src[:neg_n_val], neg_dst[:neg_n_val]
-            neg_test_src, neg_test_dst = neg_src[neg_n_val:], neg_dst[neg_n_val:]
-            self._val_edges = (F.tensor(src[val_pos_idx]), F.tensor(dst[val_pos_idx])
-                              ), (F.tensor(neg_val_src), F.tensor(neg_val_dst))
-            self._test_edges = (F.tensor(src[test_pos_idx]),
-                               F.tensor(dst[test_pos_idx])), (F.tensor(neg_test_src), F.tensor(neg_test_dst))
+            neg_test_src, neg_test_dst = (
+                neg_src[neg_n_val:],
+                neg_dst[neg_n_val:],
+            )
+            self._val_edges = (
+                F.tensor(src[val_pos_idx]),
+                F.tensor(dst[val_pos_idx]),
+            ), (F.tensor(neg_val_src), F.tensor(neg_val_dst))
+            self._test_edges = (
+                F.tensor(src[test_pos_idx]),
+                F.tensor(dst[test_pos_idx]),
+            ), (F.tensor(neg_test_src), F.tensor(neg_test_dst))
            self._train_graph = create_dgl_graph(
-                (src[train_pos_idx], dst[train_pos_idx]), num_nodes=self.num_nodes)
+                (src[train_pos_idx], dst[train_pos_idx]),
+                num_nodes=self.num_nodes,
+            )
            self._train_graph.ndata["feat"] = graph.ndata["feat"]

    def has_cache(self):
-        return os.path.isfile(os.path.join(self.save_path, 'graph_{}.bin'.format(self.hash)))
+        return os.path.isfile(
+            os.path.join(self.save_path, "graph_{}.bin".format(self.hash))
+        )

    def load(self):
        gs, tensor_dict = utils.load_graphs(
-            os.path.join(self.save_path, 'graph_{}.bin'.format(self.hash)))
+            os.path.join(self.save_path, "graph_{}.bin".format(self.hash))
+        )
        self.g = gs[0]
        self._train_graph = self.g
-        self._val_edges = (tensor_dict["val_pos_src"], tensor_dict["val_pos_dst"]), (
-            tensor_dict["val_neg_src"], tensor_dict["val_neg_dst"])
-        self._test_edges = (tensor_dict["test_pos_src"], tensor_dict["test_pos_dst"]), (
-            tensor_dict["test_neg_src"], tensor_dict["test_neg_dst"])
-
-        with open(os.path.join(self.save_path, 'info_{}.json'.format(self.hash)), 'r') as f:
+        self._val_edges = (
+            tensor_dict["val_pos_src"],
+            tensor_dict["val_pos_dst"],
+        ), (tensor_dict["val_neg_src"], tensor_dict["val_neg_dst"])
+        self._test_edges = (
+            tensor_dict["test_pos_src"],
+            tensor_dict["test_pos_dst"],
+        ), (tensor_dict["test_neg_src"], tensor_dict["test_neg_dst"])
+
+        with open(
+            os.path.join(self.save_path, "info_{}.json".format(self.hash)), "r"
+        ) as f:
            info = json.load(f)
            self.split_ratio = info["split_ratio"]
            self.neg_ratio = info["neg_ratio"]
@@ -341,12 +422,18 @@ class AsLinkPredDataset(DGLDataset):
            "test_neg_src": self._test_edges[1][0],
            "test_neg_dst": self._test_edges[1][1],
        }
-        utils.save_graphs(os.path.join(self.save_path, 'graph_{}.bin'.format(self.hash)), [
-                          self._train_graph], tensor_dict)
-        with open(os.path.join(self.save_path, 'info_{}.json'.format(self.hash)), 'w') as f:
-            json.dump({
-                'split_ratio': self.split_ratio,
-                'neg_ratio': self.neg_ratio}, f)
+        utils.save_graphs(
+            os.path.join(self.save_path, "graph_{}.bin".format(self.hash)),
+            [self._train_graph],
+            tensor_dict,
+        )
+        with open(
+            os.path.join(self.save_path, "info_{}.json".format(self.hash)), "w"
+        ) as f:
+            json.dump(
+                {"split_ratio": self.split_ratio, "neg_ratio": self.neg_ratio},
+                f,
+            )

    @property
    def feat_size(self):
@@ -370,6 +457,7 @@ class AsLinkPredDataset(DGLDataset):
    def __len__(self):
        return 1

+
 class AsGraphPredDataset(DGLDataset):
    """Repurpose a dataset for standard graph property prediction task.

@@ -425,23 +513,24 @@ class AsGraphPredDataset(DGLDataset):
           ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
           edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)}), tensor([0]))
    """
-    def __init__(self,
-                 dataset,
-                 split_ratio=None,
-                 **kwargs):
+
+    def __init__(self, dataset, split_ratio=None, **kwargs):
        self.dataset = dataset
        self.split_ratio = split_ratio
-        super().__init__(dataset.name + '-as-graphpred',
-                         hash_key=(split_ratio, dataset.name, 'graphpred'), **kwargs)
+        super().__init__(
+            dataset.name + "-as-graphpred",
+            hash_key=(split_ratio, dataset.name, "graphpred"),
+            **kwargs
+        )

    def process(self):
-        is_ogb = hasattr(self.dataset, 'get_idx_split')
+        is_ogb = hasattr(self.dataset, "get_idx_split")
        if self.split_ratio is None:
            if is_ogb:
                split = self.dataset.get_idx_split()
-                self.train_idx = split['train']
-                self.val_idx = split['valid']
-                self.test_idx = split['test']
+                self.train_idx = split["train"]
+                self.val_idx = split["valid"]
+                self.test_idx = split["test"]
            else:
                # Handle FakeNewsDataset
                try:
@@ -449,11 +538,13 @@ class AsGraphPredDataset(DGLDataset):
                    self.val_idx = F.nonzero_1d(self.dataset.val_mask)
                    self.test_idx = F.nonzero_1d(self.dataset.test_mask)
                except:
-                    raise DGLError('The input dataset does not have default train/val/test\
-                        split. Please specify split_ratio to generate the split.')
+                    raise DGLError(
+                        "The input dataset does not have default train/val/test\
+                        split. Please specify split_ratio to generate the split."
+                    )
        else:
            if self.verbose:
-                print('Generating train/val/test split...')
+                print("Generating train/val/test split...")
            train_ratio, val_ratio, _ = self.split_ratio
            num_graphs = len(self.dataset)
            num_train = int(num_graphs * train_ratio)
@@ -461,10 +552,10 @@ class AsGraphPredDataset(DGLDataset):

            idx = np.random.permutation(num_graphs)
            self.train_idx = F.tensor(idx[:num_train])
-            self.val_idx = F.tensor(idx[num_train: num_train + num_val])
-            self.test_idx = F.tensor(idx[num_train + num_val:])
+            self.val_idx = F.tensor(idx[num_train : num_train + num_val])
+            self.test_idx = F.tensor(idx[num_train + num_val :])

-        if hasattr(self.dataset, 'num_classes'):
+        if hasattr(self.dataset, "num_classes"):
            # GINDataset, MiniGCDataset, FakeNewsDataset, TUDataset,
            # LegacyTUDataset, BA2MotifDataset
            self.num_classes = self.dataset.num_classes
@@ -472,42 +563,58 @@ class AsGraphPredDataset(DGLDataset):
            # None for multi-label classification and regression
            self.num_classes = None

-        if hasattr(self.dataset, 'num_tasks'):
+        if hasattr(self.dataset, "num_tasks"):
            # OGB datasets
            self.num_tasks = self.dataset.num_tasks
        else:
            self.num_tasks = 1

    def has_cache(self):
-        return os.path.isfile(os.path.join(self.save_path, 'info_{}.json'.format(self.hash)))
+        return os.path.isfile(
+            os.path.join(self.save_path, "info_{}.json".format(self.hash))
+        )

    def load(self):
-        with open(os.path.join(self.save_path, 'info_{}.json'.format(self.hash)), 'r') as f:
+        with open(
+            os.path.join(self.save_path, "info_{}.json".format(self.hash)), "r"
+        ) as f:
            info = json.load(f)
-            if info['split_ratio'] != self.split_ratio:
-                raise ValueError('Provided split ratio is different from the cached file. '
-                                 'Re-process the dataset.')
-            self.split_ratio = info['split_ratio']
-            self.num_tasks = info['num_tasks']
-            self.num_classes = info['num_classes']
-
-        split = np.load(os.path.join(self.save_path, 'split_{}.npz'.format(self.hash)))
-        self.train_idx = F.zerocopy_from_numpy(split['train_idx'])
-        self.val_idx = F.zerocopy_from_numpy(split['val_idx'])
-        self.test_idx = F.zerocopy_from_numpy(split['test_idx'])
+            if info["split_ratio"] != self.split_ratio:
+                raise ValueError(
+                    "Provided split ratio is different from the cached file. "
+                    "Re-process the dataset."
+                )
+            self.split_ratio = info["split_ratio"]
+            self.num_tasks = info["num_tasks"]
+            self.num_classes = info["num_classes"]
+
+        split = np.load(
+            os.path.join(self.save_path, "split_{}.npz".format(self.hash))
+        )
+        self.train_idx = F.zerocopy_from_numpy(split["train_idx"])
+        self.val_idx = F.zerocopy_from_numpy(split["val_idx"])
+        self.test_idx = F.zerocopy_from_numpy(split["test_idx"])

    def save(self):
        if not os.path.exists(self.save_path):
            os.makedirs(self.save_path)
-        with open(os.path.join(self.save_path, 'info_{}.json'.format(self.hash)), 'w') as f:
-            json.dump({
-                'split_ratio': self.split_ratio,
-                'num_tasks': self.num_tasks,
-                'num_classes': self.num_classes}, f)
-        np.savez(os.path.join(self.save_path, 'split_{}.npz'.format(self.hash)),
-                 train_idx=F.zerocopy_to_numpy(self.train_idx),
-                 val_idx=F.zerocopy_to_numpy(self.val_idx),
-                 test_idx=F.zerocopy_to_numpy(self.test_idx))
+        with open(
+            os.path.join(self.save_path, "info_{}.json".format(self.hash)), "w"
+        ) as f:
+            json.dump(
+                {
+                    "split_ratio": self.split_ratio,
+                    "num_tasks": self.num_tasks,
+                    "num_classes": self.num_classes,
+                },
+                f,
+            )
+        np.savez(
+            os.path.join(self.save_path, "split_{}.npz".format(self.hash)),
+            train_idx=F.zerocopy_to_numpy(self.train_idx),
+            val_idx=F.zerocopy_to_numpy(self.val_idx),
+            test_idx=F.zerocopy_to_numpy(self.test_idx),
+        )

    def __getitem__(self, idx):
        return self.dataset[idx]
@@ -518,9 +625,9 @@ class AsGraphPredDataset(DGLDataset):
    @property
    def node_feat_size(self):
        g = self[0][0]
-        return g.ndata['feat'].shape[-1] if 'feat' in g.ndata else None
+        return g.ndata["feat"].shape[-1] if "feat" in g.ndata else None

    @property
    def edge_feat_size(self):
        g = self[0][0]
-        return g.edata['feat'].shape[-1] if 'feat' in g.edata else None
+        return g.edata["feat"].shape[-1] if "feat" in g.edata else None
--- a/python/dgl/data/bitcoinotc.py
+++ b/python/dgl/data/bitcoinotc.py
 """ BitcoinOTC dataset for fraud detection """
-import numpy as np
-import os
 import datetime
 import gzip
+import os
 import shutil

-from .dgl_dataset import DGLBuiltinDataset
-from .utils import download, makedirs, save_graphs, load_graphs, check_sha1
-from ..convert import graph as dgl_graph
+import numpy as np
+
 from .. import backend as F
+from ..convert import graph as dgl_graph
+from .dgl_dataset import DGLBuiltinDataset
+from .utils import check_sha1, download, load_graphs, makedirs, save_graphs


 class BitcoinOTCDataset(DGLBuiltinDataset):
@@ -68,35 +69,44 @@ class BitcoinOTCDataset(DGLBuiltinDataset):
    >>>
    """

-    _url = 'https://snap.stanford.edu/data/soc-sign-bitcoinotc.csv.gz'
-    _sha1_str = 'c14281f9e252de0bd0b5f1c6e2bae03123938641'
-
-    def __init__(self, raw_dir=None, force_reload=False, verbose=False, transform=None):
-        super(BitcoinOTCDataset, self).__init__(name='bitcoinotc',
-                                                url=self._url,
-                                                raw_dir=raw_dir,
-                                                force_reload=force_reload,
-                                                verbose=verbose,
-                                                transform=transform)
+    _url = "https://snap.stanford.edu/data/soc-sign-bitcoinotc.csv.gz"
+    _sha1_str = "c14281f9e252de0bd0b5f1c6e2bae03123938641"
+
+    def __init__(
+        self, raw_dir=None, force_reload=False, verbose=False, transform=None
+    ):
+        super(BitcoinOTCDataset, self).__init__(
+            name="bitcoinotc",
+            url=self._url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )

    def download(self):
-        gz_file_path = os.path.join(self.raw_dir, self.name + '.csv.gz')
+        gz_file_path = os.path.join(self.raw_dir, self.name + ".csv.gz")
        download(self.url, path=gz_file_path)
        if not check_sha1(gz_file_path, self._sha1_str):
-            raise UserWarning('File {} is downloaded but the content hash does not match.'
-                              'The repo may be outdated or download may be incomplete. '
-                              'Otherwise you can create an issue for it.'.format(self.name + '.csv.gz'))
+            raise UserWarning(
+                "File {} is downloaded but the content hash does not match."
+                "The repo may be outdated or download may be incomplete. "
+                "Otherwise you can create an issue for it.".format(
+                    self.name + ".csv.gz"
+                )
+            )
        self._extract_gz(gz_file_path, self.raw_path)

    def process(self):
-        filename = os.path.join(self.save_path, self.name + '.csv')
-        data = np.loadtxt(filename, delimiter=',').astype(np.int64)
+        filename = os.path.join(self.save_path, self.name + ".csv")
+        data = np.loadtxt(filename, delimiter=",").astype(np.int64)
        data[:, 0:2] = data[:, 0:2] - data[:, 0:2].min()
        delta = datetime.timedelta(days=14).total_seconds()
        # The source code is not released, but the paper indicates there're
        # totally 137 samples. The cutoff below has exactly 137 samples.
-        time_index = np.around(
-            (data[:, 3] - data[:, 3].min()) / delta).astype(np.int64)
+        time_index = np.around((data[:, 3] - data[:, 3].min()) / delta).astype(
+            np.int64
+        )

        self._graphs = []
        for i in range(time_index.max()):
@@ -104,19 +114,21 @@ class BitcoinOTCDataset(DGLBuiltinDataset):
            edges = data[row_mask][:, 0:2]
            rate = data[row_mask][:, 2]
            g = dgl_graph((edges[:, 0], edges[:, 1]))
-            g.edata['h'] = F.tensor(rate.reshape(-1, 1), dtype=F.data_type_dict['int64'])
+            g.edata["h"] = F.tensor(
+                rate.reshape(-1, 1), dtype=F.data_type_dict["int64"]
+            )
            self._graphs.append(g)

    def has_cache(self):
-        graph_path = os.path.join(self.save_path, 'dgl_graph.bin')
+        graph_path = os.path.join(self.save_path, "dgl_graph.bin")
        return os.path.exists(graph_path)

    def save(self):
-        graph_path = os.path.join(self.save_path, 'dgl_graph.bin')
+        graph_path = os.path.join(self.save_path, "dgl_graph.bin")
        save_graphs(graph_path, self.graphs)

    def load(self):
-        graph_path = os.path.join(self.save_path, 'dgl_graph.bin')
+        graph_path = os.path.join(self.save_path, "dgl_graph.bin")
        self._graphs = load_graphs(graph_path)[0]

    @property
@@ -124,7 +136,7 @@ class BitcoinOTCDataset(DGLBuiltinDataset):
        return self._graphs

    def __len__(self):
-        r""" Number of graphs in the dataset.
+        r"""Number of graphs in the dataset.

        Return
        -------
@@ -133,7 +145,7 @@ class BitcoinOTCDataset(DGLBuiltinDataset):
        return len(self.graphs)

    def __getitem__(self, item):
-        r""" Get graph by index
+        r"""Get graph by index

        Parameters
        ----------
@@ -155,7 +167,7 @@ class BitcoinOTCDataset(DGLBuiltinDataset):

    @property
    def is_temporal(self):
-        r""" Are the graphs temporal graphs
+        r"""Are the graphs temporal graphs

        Returns
        -------
@@ -166,12 +178,12 @@ class BitcoinOTCDataset(DGLBuiltinDataset):
    def _extract_gz(self, file, target_dir, overwrite=False):
        if os.path.exists(target_dir) and not overwrite:
            return
-        print('Extracting file to {}'.format(target_dir))
+        print("Extracting file to {}".format(target_dir))
        fname = os.path.basename(file)
        makedirs(target_dir)
        out_file_path = os.path.join(target_dir, fname[:-3])
-        with gzip.open(file, 'rb') as f_in:
-            with open(out_file_path, 'wb') as f_out:
+        with gzip.open(file, "rb") as f_in:
+            with open(out_file_path, "wb") as f_out:
                shutil.copyfileobj(f_in, f_out)



--- a/python/dgl/data/csv_dataset.py
+++ b/python/dgl/data/csv_dataset.py
 import os
+
 import numpy as np
-from .dgl_dataset import DGLDataset
-from .utils import save_graphs, load_graphs, Subset
+
 from .. import backend as F
 from ..base import DGLError
+from .dgl_dataset import DGLDataset
+from .utils import Subset, load_graphs, save_graphs


 class CSVDataset(DGLDataset):
@@ -65,11 +67,24 @@ class CSVDataset(DGLDataset):
    Please refer to :ref:`guide-data-pipeline-loadcsv`.

    """
-    META_YAML_NAME = 'meta.yaml'

-    def __init__(self, data_path, force_reload=False, verbose=True, ndata_parser=None,
-                 edata_parser=None, gdata_parser=None, transform=None):
-        from .csv_dataset_base import load_yaml_with_sanity_check, DefaultDataParser
+    META_YAML_NAME = "meta.yaml"
+
+    def __init__(
+        self,
+        data_path,
+        force_reload=False,
+        verbose=True,
+        ndata_parser=None,
+        edata_parser=None,
+        gdata_parser=None,
+        transform=None,
+    ):
+        from .csv_dataset_base import (
+            DefaultDataParser,
+            load_yaml_with_sanity_check,
+        )
+
        self.graphs = None
        self.data = None
        self.ndata_parser = {} if ndata_parser is None else ndata_parser
@@ -79,17 +94,29 @@ class CSVDataset(DGLDataset):
        meta_yaml_path = os.path.join(data_path, CSVDataset.META_YAML_NAME)
        if not os.path.exists(meta_yaml_path):
            raise DGLError(
-                "'{}' cannot be found under {}.".format(CSVDataset.META_YAML_NAME, data_path))
+                "'{}' cannot be found under {}.".format(
+                    CSVDataset.META_YAML_NAME, data_path
+                )
+            )
        self.meta_yaml = load_yaml_with_sanity_check(meta_yaml_path)
        ds_name = self.meta_yaml.dataset_name
-        super().__init__(ds_name, raw_dir=os.path.dirname(
-            meta_yaml_path), force_reload=force_reload, verbose=verbose, transform=transform)
-
+        super().__init__(
+            ds_name,
+            raw_dir=os.path.dirname(meta_yaml_path),
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )

    def process(self):
-        """Parse node/edge data from CSV files and construct DGL.Graphs
-        """
-        from .csv_dataset_base import NodeData, EdgeData, GraphData, DGLGraphConstructor
+        """Parse node/edge data from CSV files and construct DGL.Graphs"""
+        from .csv_dataset_base import (
+            DGLGraphConstructor,
+            EdgeData,
+            GraphData,
+            NodeData,
+        )
+
        meta_yaml = self.meta_yaml
        base_dir = self.raw_dir
        node_data = []
@@ -97,36 +124,58 @@ class CSVDataset(DGLDataset):
            if meta_node is None:
                continue
            ntype = meta_node.ntype
-            data_parser = self.ndata_parser if callable(
-                self.ndata_parser) else self.ndata_parser.get(ntype, self.default_data_parser)
+            data_parser = (
+                self.ndata_parser
+                if callable(self.ndata_parser)
+                else self.ndata_parser.get(ntype, self.default_data_parser)
+            )
            ndata = NodeData.load_from_csv(
-                meta_node, base_dir=base_dir, separator=meta_yaml.separator, data_parser=data_parser)
+                meta_node,
+                base_dir=base_dir,
+                separator=meta_yaml.separator,
+                data_parser=data_parser,
+            )
            node_data.append(ndata)
        edge_data = []
        for meta_edge in meta_yaml.edge_data:
            if meta_edge is None:
                continue
            etype = tuple(meta_edge.etype)
-            data_parser = self.edata_parser if callable(
-                self.edata_parser) else self.edata_parser.get(etype, self.default_data_parser)
+            data_parser = (
+                self.edata_parser
+                if callable(self.edata_parser)
+                else self.edata_parser.get(etype, self.default_data_parser)
+            )
            edata = EdgeData.load_from_csv(
-                meta_edge, base_dir=base_dir, separator=meta_yaml.separator, data_parser=data_parser)
+                meta_edge,
+                base_dir=base_dir,
+                separator=meta_yaml.separator,
+                data_parser=data_parser,
+            )
            edge_data.append(edata)
        graph_data = None
        if meta_yaml.graph_data is not None:
            meta_graph = meta_yaml.graph_data
-            data_parser = self.default_data_parser if self.gdata_parser is None else self.gdata_parser
+            data_parser = (
+                self.default_data_parser
+                if self.gdata_parser is None
+                else self.gdata_parser
+            )
            graph_data = GraphData.load_from_csv(
-                meta_graph, base_dir=base_dir, separator=meta_yaml.separator, data_parser=data_parser)
+                meta_graph,
+                base_dir=base_dir,
+                separator=meta_yaml.separator,
+                data_parser=data_parser,
+            )
        # construct graphs
        self.graphs, self.data = DGLGraphConstructor.construct_graphs(
-            node_data, edge_data, graph_data)
+            node_data, edge_data, graph_data
+        )
        if len(self.data) == 1:
            self.labels = list(self.data.values())[0]

    def has_cache(self):
-        graph_path = os.path.join(self.save_path,
-                                  self.name + '.bin')
+        graph_path = os.path.join(self.save_path, self.name + ".bin")
        if os.path.exists(graph_path):
            return True

@@ -135,14 +184,11 @@ class CSVDataset(DGLDataset):
    def save(self):
        if self.graphs is None:
            raise DGLError("No graphs available in dataset")
-        graph_path = os.path.join(self.save_path,
-                                  self.name + '.bin')
-        save_graphs(graph_path, self.graphs,
-                    labels=self.data)
+        graph_path = os.path.join(self.save_path, self.name + ".bin")
+        save_graphs(graph_path, self.graphs, labels=self.data)

    def load(self):
-        graph_path = os.path.join(self.save_path,
-                                  self.name + '.bin')
+        graph_path = os.path.join(self.save_path, self.name + ".bin")
        self.graphs, self.data = load_graphs(graph_path)
        if len(self.data) == 1:
            self.labels = list(self.data.values())[0]

--- a/python/dgl/data/csv_dataset_base.py
+++ b/python/dgl/data/csv_dataset_base.py
+import ast
 import os
+from typing import Callable, List, Optional
+
 import numpy as np
-from typing import List, Optional, Callable
-from .. import backend as F
-from ..convert import heterograph as dgl_heterograph
-from ..base import dgl_warning, DGLError
-import ast
-import pydantic as dt
 import pandas as pd
+import pydantic as dt
 import yaml

+from .. import backend as F
+from ..base import DGLError, dgl_warning
+from ..convert import heterograph as dgl_heterograph
+

 class MetaNode(dt.BaseModel):
-    """ Class of node_data in YAML. Internal use only. """
+    """Class of node_data in YAML. Internal use only."""
+
    file_name: str
-    ntype: Optional[str] = '_V'
-    graph_id_field: Optional[str] = 'graph_id'
-    node_id_field: Optional[str] = 'node_id'
+    ntype: Optional[str] = "_V"
+    graph_id_field: Optional[str] = "graph_id"
+    node_id_field: Optional[str] = "node_id"


 class MetaEdge(dt.BaseModel):
-    """ Class of edge_data in YAML. Internal use only. """
+    """Class of edge_data in YAML. Internal use only."""
+
    file_name: str
-    etype: Optional[List[str]] = ['_V', '_E', '_V']
-    graph_id_field: Optional[str] = 'graph_id'
-    src_id_field: Optional[str] = 'src_id'
-    dst_id_field: Optional[str] = 'dst_id'
+    etype: Optional[List[str]] = ["_V", "_E", "_V"]
+    graph_id_field: Optional[str] = "graph_id"
+    src_id_field: Optional[str] = "src_id"
+    dst_id_field: Optional[str] = "dst_id"


 class MetaGraph(dt.BaseModel):
-    """ Class of graph_data in YAML. Internal use only. """
+    """Class of graph_data in YAML. Internal use only."""
+
    file_name: str
-    graph_id_field: Optional[str] = 'graph_id'
+    graph_id_field: Optional[str] = "graph_id"


 class MetaYaml(dt.BaseModel):
-    """ Class of YAML. Internal use only. """
-    version: Optional[str] = '1.0.0'
+    """Class of YAML. Internal use only."""
+
+    version: Optional[str] = "1.0.0"
    dataset_name: str
-    separator: Optional[str] = ','
+    separator: Optional[str] = ","
    node_data: List[MetaNode]
    edge_data: List[MetaEdge]
    graph_data: Optional[MetaGraph] = None


 def load_yaml_with_sanity_check(yaml_file):
-    """ Load yaml and do sanity check. Internal use only. """
+    """Load yaml and do sanity check. Internal use only."""
    with open(yaml_file) as f:
        yaml_data = yaml.load(f, Loader=yaml.loader.SafeLoader)
        try:
            meta_yaml = MetaYaml(**yaml_data)
        except dt.ValidationError as e:
-            print(
-                "Details of pydantic.ValidationError:\n{}".format(e.json()))
+            print("Details of pydantic.ValidationError:\n{}".format(e.json()))
+            raise DGLError(
+                "Validation Error for YAML fields. Details are shown above."
+            )
+        if meta_yaml.version != "1.0.0":
            raise DGLError(
-                "Validation Error for YAML fields. Details are shown above.")
-        if meta_yaml.version != '1.0.0':
-            raise DGLError("Invalid CSVDataset version {}. Supported versions: '1.0.0'".format(
-                meta_yaml.version))
+                "Invalid CSVDataset version {}. Supported versions: '1.0.0'".format(
+                    meta_yaml.version
+                )
+            )
        ntypes = [meta.ntype for meta in meta_yaml.node_data]
        if len(ntypes) > len(set(ntypes)):
            raise DGLError(
-                "Each node CSV file must have a unique node type name, but found duplicate node type: {}.".format(ntypes))
+                "Each node CSV file must have a unique node type name, but found duplicate node type: {}.".format(
+                    ntypes
+                )
+            )
        etypes = [tuple(meta.etype) for meta in meta_yaml.edge_data]
        if len(etypes) > len(set(etypes)):
            raise DGLError(
-                "Each edge CSV file must have a unique edge type name, but found duplicate edge type: {}.".format(etypes))
+                "Each edge CSV file must have a unique edge type name, but found duplicate edge type: {}.".format(
+                    etypes
+                )
+            )
        return meta_yaml


@@ -74,7 +89,10 @@ def _validate_data_length(data_dict):
    res = lst.count(lst[0]) == len(lst)
    if not res:
        raise DGLError(
-            "All data are required to have same length while some of them does not. Length of data={}".format(str(len_dict)))
+            "All data are required to have same length while some of them does not. Length of data={}".format(
+                str(len_dict)
+            )
+        )


 def _tensor(data, dtype=None):
@@ -86,8 +104,10 @@ def _tensor(data, dtype=None):
        ret = F.tensor(ret, dtype=F.float32)
    return ret

+
 class BaseData:
-    """ Class of base data which is inherited by Node/Edge/GraphData. Internal use only. """
+    """Class of base data which is inherited by Node/Edge/GraphData. Internal use only."""
+
    @staticmethod
    def read_csv(file_name, base_dir, separator):
        csv_path = file_name
@@ -106,31 +126,40 @@ class BaseData:


 class NodeData(BaseData):
-    """ Class of node data which is used for DGLGraph construction. Internal use only. """
+    """Class of node data which is used for DGLGraph construction. Internal use only."""

    def __init__(self, node_id, data, type=None, graph_id=None):
        self.id = np.array(node_id)
        self.data = data
-        self.type = type if type is not None else '_V'
-        self.graph_id = np.array(
-            graph_id) if graph_id is not None else np.full(len(node_id), 0)
+        self.type = type if type is not None else "_V"
+        self.graph_id = (
+            np.array(graph_id)
+            if graph_id is not None
+            else np.full(len(node_id), 0)
+        )
        _validate_data_length(
-            {**{'id': self.id, 'graph_id': self.graph_id}, **self.data})
+            {**{"id": self.id, "graph_id": self.graph_id}, **self.data}
+        )

    @staticmethod
-    def load_from_csv(meta: MetaNode, data_parser: Callable, base_dir=None, separator=','):
+    def load_from_csv(
+        meta: MetaNode, data_parser: Callable, base_dir=None, separator=","
+    ):
        df = BaseData.read_csv(meta.file_name, base_dir, separator)
        node_ids = BaseData.pop_from_dataframe(df, meta.node_id_field)
        graph_ids = BaseData.pop_from_dataframe(df, meta.graph_id_field)
        if node_ids is None:
-            raise DGLError("Missing node id field [{}] in file [{}].".format(
-                meta.node_id_field, meta.file_name))
+            raise DGLError(
+                "Missing node id field [{}] in file [{}].".format(
+                    meta.node_id_field, meta.file_name
+                )
+            )
        ntype = meta.ntype
        ndata = data_parser(df)
        return NodeData(node_ids, ndata, type=ntype, graph_id=graph_ids)

    @staticmethod
-    def to_dict(node_data: List['NodeData']) -> dict:
+    def to_dict(node_data: List["NodeData"]) -> dict:
        # node_ids could be numeric or non-numeric values, but duplication is not allowed.
        node_dict = {}
        for n_data in node_data:
@@ -139,112 +168,159 @@ class NodeData(BaseData):
                idx = n_data.graph_id == graph_id
                ids = n_data.id[idx]
                u_ids, u_indices, u_counts = np.unique(
-                    ids, return_index=True, return_counts=True)
+                    ids, return_index=True, return_counts=True
+                )
                if len(ids) > len(u_ids):
-                    raise DGLError("Node IDs are required to be unique but the following ids are duplicate: {}".format(
-                        u_ids[u_counts > 1]))
+                    raise DGLError(
+                        "Node IDs are required to be unique but the following ids are duplicate: {}".format(
+                            u_ids[u_counts > 1]
+                        )
+                    )
                if graph_id not in node_dict:
                    node_dict[graph_id] = {}
-                node_dict[graph_id][n_data.type] = {'mapping': {index: i for i,
-                                                                index in enumerate(ids[u_indices])},
-                                                    'data': {k: _tensor(v[idx][u_indices])
-                                                             for k, v in n_data.data.items()},
-                                                    'dtype': ids.dtype}
+                node_dict[graph_id][n_data.type] = {
+                    "mapping": {
+                        index: i for i, index in enumerate(ids[u_indices])
+                    },
+                    "data": {
+                        k: _tensor(v[idx][u_indices])
+                        for k, v in n_data.data.items()
+                    },
+                    "dtype": ids.dtype,
+                }
        return node_dict


 class EdgeData(BaseData):
-    """ Class of edge data which is used for DGLGraph construction. Internal use only. """
+    """Class of edge data which is used for DGLGraph construction. Internal use only."""

    def __init__(self, src_id, dst_id, data, type=None, graph_id=None):
        self.src = np.array(src_id)
        self.dst = np.array(dst_id)
        self.data = data
-        self.type = type if type is not None else ('_V', '_E', '_V')
-        self.graph_id = np.array(
-            graph_id) if graph_id is not None else np.full(len(src_id), 0)
+        self.type = type if type is not None else ("_V", "_E", "_V")
+        self.graph_id = (
+            np.array(graph_id)
+            if graph_id is not None
+            else np.full(len(src_id), 0)
+        )
        _validate_data_length(
-            {**{'src': self.src, 'dst': self.dst, 'graph_id': self.graph_id}, **self.data})
+            {
+                **{"src": self.src, "dst": self.dst, "graph_id": self.graph_id},
+                **self.data,
+            }
+        )

    @staticmethod
-    def load_from_csv(meta: MetaEdge, data_parser: Callable, base_dir=None, separator=','):
+    def load_from_csv(
+        meta: MetaEdge, data_parser: Callable, base_dir=None, separator=","
+    ):
        df = BaseData.read_csv(meta.file_name, base_dir, separator)
        src_ids = BaseData.pop_from_dataframe(df, meta.src_id_field)
        if src_ids is None:
-            raise DGLError("Missing src id field [{}] in file [{}].".format(
-                meta.src_id_field, meta.file_name))
+            raise DGLError(
+                "Missing src id field [{}] in file [{}].".format(
+                    meta.src_id_field, meta.file_name
+                )
+            )
        dst_ids = BaseData.pop_from_dataframe(df, meta.dst_id_field)
        if dst_ids is None:
-            raise DGLError("Missing dst id field [{}] in file [{}].".format(
-                meta.dst_id_field, meta.file_name))
+            raise DGLError(
+                "Missing dst id field [{}] in file [{}].".format(
+                    meta.dst_id_field, meta.file_name
+                )
+            )
        graph_ids = BaseData.pop_from_dataframe(df, meta.graph_id_field)
        etype = tuple(meta.etype)
        edata = data_parser(df)
        return EdgeData(src_ids, dst_ids, edata, type=etype, graph_id=graph_ids)

    @staticmethod
-    def to_dict(edge_data: List['EdgeData'], node_dict: dict) -> dict:
+    def to_dict(edge_data: List["EdgeData"], node_dict: dict) -> dict:
        edge_dict = {}
        for e_data in edge_data:
            (src_type, e_type, dst_type) = e_data.type
            graph_ids = np.unique(e_data.graph_id)
            for graph_id in graph_ids:
                if graph_id in edge_dict and e_data.type in edge_dict[graph_id]:
-                    raise DGLError(f"Duplicate edge type[{e_data.type}] for same graph[{graph_id}], please place the same edge_type for same graph into single EdgeData.")
+                    raise DGLError(
+                        f"Duplicate edge type[{e_data.type}] for same graph[{graph_id}], please place the same edge_type for same graph into single EdgeData."
+                    )
                idx = e_data.graph_id == graph_id
-                src_mapping = node_dict[graph_id][src_type]['mapping']
-                dst_mapping = node_dict[graph_id][dst_type]['mapping']
-                orig_src_ids = e_data.src[idx].astype(node_dict[graph_id][src_type]['dtype'])
-                orig_dst_ids = e_data.dst[idx].astype(node_dict[graph_id][dst_type]['dtype'])
+                src_mapping = node_dict[graph_id][src_type]["mapping"]
+                dst_mapping = node_dict[graph_id][dst_type]["mapping"]
+                orig_src_ids = e_data.src[idx].astype(
+                    node_dict[graph_id][src_type]["dtype"]
+                )
+                orig_dst_ids = e_data.dst[idx].astype(
+                    node_dict[graph_id][dst_type]["dtype"]
+                )
                src_ids = [src_mapping[index] for index in orig_src_ids]
                dst_ids = [dst_mapping[index] for index in orig_dst_ids]
                if graph_id not in edge_dict:
                    edge_dict[graph_id] = {}
-                edge_dict[graph_id][e_data.type] = {'edges': (_tensor(src_ids), _tensor(dst_ids)),
-                                                    'data': {k: _tensor(v[idx])
-                                                             for k, v in e_data.data.items()}}
+                edge_dict[graph_id][e_data.type] = {
+                    "edges": (_tensor(src_ids), _tensor(dst_ids)),
+                    "data": {
+                        k: _tensor(v[idx]) for k, v in e_data.data.items()
+                    },
+                }
        return edge_dict


 class GraphData(BaseData):
-    """ Class of graph data which is used for DGLGraph construction. Internal use only. """
+    """Class of graph data which is used for DGLGraph construction. Internal use only."""

    def __init__(self, graph_id, data):
        self.graph_id = np.array(graph_id)
        self.data = data
-        _validate_data_length({**{'graph_id': self.graph_id}, **self.data})
+        _validate_data_length({**{"graph_id": self.graph_id}, **self.data})

    @staticmethod
-    def load_from_csv(meta: MetaGraph, data_parser: Callable, base_dir=None, separator=','):
+    def load_from_csv(
+        meta: MetaGraph, data_parser: Callable, base_dir=None, separator=","
+    ):
        df = BaseData.read_csv(meta.file_name, base_dir, separator)
        graph_ids = BaseData.pop_from_dataframe(df, meta.graph_id_field)
        if graph_ids is None:
-            raise DGLError("Missing graph id field [{}] in file [{}].".format(
-                meta.graph_id_field, meta.file_name))
+            raise DGLError(
+                "Missing graph id field [{}] in file [{}].".format(
+                    meta.graph_id_field, meta.file_name
+                )
+            )
        gdata = data_parser(df)
        return GraphData(graph_ids, gdata)

    @staticmethod
-    def to_dict(graph_data: 'GraphData', graphs_dict: dict) -> dict:
+    def to_dict(graph_data: "GraphData", graphs_dict: dict) -> dict:
        missing_ids = np.setdiff1d(
-            np.array(list(graphs_dict.keys())), graph_data.graph_id)
+            np.array(list(graphs_dict.keys())), graph_data.graph_id
+        )
        if len(missing_ids) > 0:
            raise DGLError(
-                "Found following graph ids in node/edge CSVs but not in graph CSV: {}.".format(missing_ids))
+                "Found following graph ids in node/edge CSVs but not in graph CSV: {}.".format(
+                    missing_ids
+                )
+            )
        graph_ids = graph_data.graph_id
        graphs = []
        for graph_id in graph_ids:
            if graph_id not in graphs_dict:
                graphs_dict[graph_id] = dgl_heterograph(
-                    {('_V', '_E', '_V'): ([], [])})
+                    {("_V", "_E", "_V"): ([], [])}
+                )
        for graph_id in graph_ids:
            graphs.append(graphs_dict[graph_id])
-        data = {k: F.reshape(_tensor(v), (len(graphs), -1)) for k, v in graph_data.data.items()}
+        data = {
+            k: F.reshape(_tensor(v), (len(graphs), -1))
+            for k, v in graph_data.data.items()
+        }
        return graphs, data


 class DGLGraphConstructor:
-    """ Class for constructing DGLGraph from Node/Edge/Graph data. Internal use only. """
+    """Class for constructing DGLGraph from Node/Edge/Graph data. Internal use only."""
+
    @staticmethod
    def construct_graphs(node_data, edge_data, graph_data=None):
        if not isinstance(node_data, list):
@@ -253,12 +329,10 @@ class DGLGraphConstructor:
            edge_data = [edge_data]
        node_dict = NodeData.to_dict(node_data)
        edge_dict = EdgeData.to_dict(edge_data, node_dict)
-        graph_dict = DGLGraphConstructor._construct_graphs(
-            node_dict, edge_dict)
+        graph_dict = DGLGraphConstructor._construct_graphs(node_dict, edge_dict)
        if graph_data is None:
            graph_data = GraphData(np.full(1, 0), {})
-        graphs, data = GraphData.to_dict(
-            graph_data, graph_dict)
+        graphs, data = GraphData.to_dict(graph_data, graph_dict)
        return graphs, data

    @staticmethod
@@ -266,40 +340,47 @@ class DGLGraphConstructor:
        graph_dict = {}
        for graph_id in node_dict:
            if graph_id not in edge_dict:
-                edge_dict[graph_id][('_V', '_E', '_V')] = {'edges': ([], [])}
-            graph = dgl_heterograph({etype: edata['edges']
-                                     for etype, edata in edge_dict[graph_id].items()},
-                                    num_nodes_dict={ntype: len(ndata['mapping'])
-                                                    for ntype, ndata in node_dict[graph_id].items()})
+                edge_dict[graph_id][("_V", "_E", "_V")] = {"edges": ([], [])}
+            graph = dgl_heterograph(
+                {
+                    etype: edata["edges"]
+                    for etype, edata in edge_dict[graph_id].items()
+                },
+                num_nodes_dict={
+                    ntype: len(ndata["mapping"])
+                    for ntype, ndata in node_dict[graph_id].items()
+                },
+            )

            def assign_data(type, src_data, dst_data):
                for key, value in src_data.items():
                    dst_data[type].data[key] = value
+
            for type, data in node_dict[graph_id].items():
-                assign_data(type, data['data'], graph.nodes)
+                assign_data(type, data["data"], graph.nodes)
            for (type), data in edge_dict[graph_id].items():
-                assign_data(type, data['data'], graph.edges)
+                assign_data(type, data["data"], graph.edges)
            graph_dict[graph_id] = graph
        return graph_dict


 class DefaultDataParser:
-    """ Default data parser for CSVDataset. It
-        1. ignores any columns which does not have a header.
-        2. tries to convert to list of numeric values(generated by
-            np.array().tolist()) if cell data is a str separated by ','.
-        3. read data and infer data type directly, otherwise.
+    """Default data parser for CSVDataset. It
+    1. ignores any columns which does not have a header.
+    2. tries to convert to list of numeric values(generated by
+        np.array().tolist()) if cell data is a str separated by ','.
+    3. read data and infer data type directly, otherwise.
    """

    def __call__(self, df: pd.DataFrame):
        data = {}
        for header in df:
-            if 'Unnamed' in header:
+            if "Unnamed" in header:
                dgl_warning("Unamed column is found. Ignored...")
                continue
            dt = df[header].to_numpy().squeeze()
            if len(dt) > 0 and isinstance(dt[0], str):
-                #probably consists of list of numeric values
+                # probably consists of list of numeric values
                dt = np.array([ast.literal_eval(row) for row in dt])
            data[header] = dt
        return data
--- a/python/dgl/data/dgl_dataset.py
+++ b/python/dgl/data/dgl_dataset.py
@@ -3,11 +3,15 @@

 from __future__ import absolute_import

-import os, sys, hashlib
-import traceback
 import abc
-from .utils import download, extract_archive, get_download_dir, makedirs
+import hashlib
+import os
+import sys
+import traceback
+
 from ..utils import retry_method_with_fix
+from .utils import download, extract_archive, get_download_dir, makedirs
+

 class DGLDataset(object):
    r"""The basic DGL dataset for creating graph datasets.
@@ -75,8 +79,18 @@ class DGLDataset(object):
    hash : str
        Hash value for the dataset and the setting.
    """
-    def __init__(self, name, url=None, raw_dir=None, save_dir=None,
-                 hash_key=(), force_reload=False, verbose=False, transform=None):
+
+    def __init__(
+        self,
+        name,
+        url=None,
+        raw_dir=None,
+        save_dir=None,
+        hash_key=(),
+        force_reload=False,
+        verbose=False,
+        transform=None,
+    ):
        self._name = name
        self._url = url
        self._force_reload = force_reload
@@ -131,8 +145,7 @@ class DGLDataset(object):

    @abc.abstractmethod
    def process(self):
-        r"""Overwrite to realize your own logic of processing the input data.
-        """
+        r"""Overwrite to realize your own logic of processing the input data."""
        pass

    def has_cache(self):
@@ -177,21 +190,21 @@ class DGLDataset(object):
            try:
                self.load()
                if self.verbose:
-                    print('Done loading data from cached files.')
+                    print("Done loading data from cached files.")
            except KeyboardInterrupt:
                raise
            except:
                load_flag = False
                if self.verbose:
                    print(traceback.format_exc())
-                    print('Loading from cache failed, re-processing.')
+                    print("Loading from cache failed, re-processing.")

        if not load_flag:
            self._download()
            self.process()
            self.save()
            if self.verbose:
-                print('Done saving data into cached files.')
+                print("Done saving data into cached files.")

    def _get_hash(self):
        """Compute the hash of the input tuple
@@ -205,62 +218,54 @@ class DGLDataset(object):
        'a770b222'
        """
        hash_func = hashlib.sha1()
-        hash_func.update(str(self._hash_key).encode('utf-8'))
+        hash_func.update(str(self._hash_key).encode("utf-8"))
        return hash_func.hexdigest()[:8]

    @property
    def url(self):
-        r"""Get url to download the raw dataset.
-        """
+        r"""Get url to download the raw dataset."""
        return self._url

    @property
    def name(self):
-        r"""Name of the dataset.
-        """
+        r"""Name of the dataset."""
        return self._name

    @property
    def raw_dir(self):
-        r"""Raw file directory contains the input data folder.
-        """
+        r"""Raw file directory contains the input data folder."""
        return self._raw_dir

    @property
    def raw_path(self):
        r"""Directory contains the input data files.
-            By default raw_path = os.path.join(self.raw_dir, self.name)
+        By default raw_path = os.path.join(self.raw_dir, self.name)
        """
        return os.path.join(self.raw_dir, self.name)

    @property
    def save_dir(self):
-        r"""Directory to save the processed dataset.
-        """
+        r"""Directory to save the processed dataset."""
        return self._save_dir

    @property
    def save_path(self):
-        r"""Path to save the processed dataset.
-        """
+        r"""Path to save the processed dataset."""
        return os.path.join(self._save_dir, self.name)

    @property
    def verbose(self):
-        r"""Whether to print information.
-        """
+        r"""Whether to print information."""
        return self._verbose

    @property
    def hash(self):
-        r"""Hash value for the dataset and the setting.
-        """
+        r"""Hash value for the dataset and the setting."""
        return self._hash

    @abc.abstractmethod
    def __getitem__(self, idx):
-        r"""Gets the data object at index.
-        """
+        r"""Gets the data object at index."""
        pass

    @abc.abstractmethod
@@ -269,8 +274,11 @@ class DGLDataset(object):
        pass

    def __repr__(self):
-        return f'Dataset("{self.name}", num_graphs={len(self)},' + \
-               f' save_path={self.save_path})'
+        return (
+            f'Dataset("{self.name}", num_graphs={len(self)},'
+            + f" save_path={self.save_path})"
+        )
+

 class DGLBuiltinDataset(DGLDataset):
    r"""The Basic DGL Builtin Dataset.
@@ -299,21 +307,31 @@ class DGLBuiltinDataset(DGLDataset):
        a transformed version. The :class:`~dgl.DGLGraph` object will be
        transformed before every access.
    """
-    def __init__(self, name, url, raw_dir=None, hash_key=(),
-                 force_reload=False, verbose=False, transform=None):
-        super(DGLBuiltinDataset, self).__init__(name,
-                                                url=url,
-                                                raw_dir=raw_dir,
-                                                save_dir=None,
-                                                hash_key=hash_key,
-                                                force_reload=force_reload,
-                                                verbose=verbose,
-                                                transform=transform)
+
+    def __init__(
+        self,
+        name,
+        url,
+        raw_dir=None,
+        hash_key=(),
+        force_reload=False,
+        verbose=False,
+        transform=None,
+    ):
+        super(DGLBuiltinDataset, self).__init__(
+            name,
+            url=url,
+            raw_dir=raw_dir,
+            save_dir=None,
+            hash_key=hash_key,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )

    def download(self):
-        r""" Automatically download data and extract it.
-        """
+        r"""Automatically download data and extract it."""
        if self.url is not None:
-            zip_file_path = os.path.join(self.raw_dir, self.name + '.zip')
+            zip_file_path = os.path.join(self.raw_dir, self.name + ".zip")
            download(self.url, path=zip_file_path)
            extract_archive(zip_file_path, self.raw_path)
--- a/python/dgl/data/fakenews.py
+++ b/python/dgl/data/fakenews.py
 import os
+
 import numpy as np
 import scipy.sparse as sp

-from .dgl_dataset import DGLBuiltinDataset
-from .utils import save_graphs, load_graphs, _get_dgl_url
-from .utils import save_info, load_info
-from ..convert import graph
 from .. import backend as F
+from ..convert import graph
+from .dgl_dataset import DGLBuiltinDataset
+from .utils import _get_dgl_url, load_graphs, load_info, save_graphs, save_info


 class FakeNewsDataset(DGLBuiltinDataset):
@@ -113,30 +113,41 @@ class FakeNewsDataset(DGLBuiltinDataset):
    >>> labels = dataset.labels
    """
    file_urls = {
-        'gossipcop': 'dataset/FakeNewsGOS.zip',
-        'politifact': 'dataset/FakeNewsPOL.zip'
+        "gossipcop": "dataset/FakeNewsGOS.zip",
+        "politifact": "dataset/FakeNewsPOL.zip",
    }

    def __init__(self, name, feature_name, raw_dir=None, transform=None):
-        assert name in ['gossipcop', 'politifact'], \
-            "Only supports 'gossipcop' or 'politifact'."
+        assert name in [
+            "gossipcop",
+            "politifact",
+        ], "Only supports 'gossipcop' or 'politifact'."
        url = _get_dgl_url(self.file_urls[name])

-        assert feature_name in ['bert', 'content', 'profile', 'spacy'], \
-            "Only supports 'bert', 'content', 'profile', or 'spacy'"
+        assert feature_name in [
+            "bert",
+            "content",
+            "profile",
+            "spacy",
+        ], "Only supports 'bert', 'content', 'profile', or 'spacy'"
        self.feature_name = feature_name
-        super(FakeNewsDataset, self).__init__(name=name,
-                                              url=url,
-                                              raw_dir=raw_dir,
-                                              transform=transform)
+        super(FakeNewsDataset, self).__init__(
+            name=name, url=url, raw_dir=raw_dir, transform=transform
+        )

    def process(self):
        """process raw data to graph, labels and masks"""
-        self.labels = F.tensor(np.load(os.path.join(self.raw_path, 'graph_labels.npy')))
+        self.labels = F.tensor(
+            np.load(os.path.join(self.raw_path, "graph_labels.npy"))
+        )
        num_graphs = self.labels.shape[0]

-        node_graph_id = np.load(os.path.join(self.raw_path, 'node_graph_id.npy'))
-        edges = np.genfromtxt(os.path.join(self.raw_path, 'A.txt'), delimiter=',', dtype=int)
+        node_graph_id = np.load(
+            os.path.join(self.raw_path, "node_graph_id.npy")
+        )
+        edges = np.genfromtxt(
+            os.path.join(self.raw_path, "A.txt"), delimiter=",", dtype=int
+        )
        src = edges[:, 0]
        dst = edges[:, 1]
        g = graph((src, dst))
@@ -148,9 +159,9 @@ class FakeNewsDataset(DGLBuiltinDataset):

        self.graphs = [g.subgraph(node_idx) for node_idx in node_idx_list]

-        train_idx = np.load(os.path.join(self.raw_path, 'train_idx.npy'))
-        val_idx = np.load(os.path.join(self.raw_path, 'val_idx.npy'))
-        test_idx = np.load(os.path.join(self.raw_path, 'test_idx.npy'))
+        train_idx = np.load(os.path.join(self.raw_path, "train_idx.npy"))
+        val_idx = np.load(os.path.join(self.raw_path, "val_idx.npy"))
+        test_idx = np.load(os.path.join(self.raw_path, "test_idx.npy"))
        train_mask = np.zeros(num_graphs, dtype=np.bool)
        val_mask = np.zeros(num_graphs, dtype=np.bool)
        test_mask = np.zeros(num_graphs, dtype=np.bool)
@@ -161,40 +172,47 @@ class FakeNewsDataset(DGLBuiltinDataset):
        self.val_mask = F.tensor(val_mask)
        self.test_mask = F.tensor(test_mask)

-        feature_file = 'new_' + self.feature_name + '_feature.npz'
-        self.feature = F.tensor(sp.load_npz(os.path.join(self.raw_path, feature_file)).todense())
+        feature_file = "new_" + self.feature_name + "_feature.npz"
+        self.feature = F.tensor(
+            sp.load_npz(os.path.join(self.raw_path, feature_file)).todense()
+        )

    def save(self):
        """save the graph list and the labels"""
-        graph_path = os.path.join(self.save_path, self.name + '_dgl_graph.bin')
-        info_path = os.path.join(self.save_path, self.name + '_dgl_graph.pkl')
+        graph_path = os.path.join(self.save_path, self.name + "_dgl_graph.bin")
+        info_path = os.path.join(self.save_path, self.name + "_dgl_graph.pkl")
        save_graphs(str(graph_path), self.graphs)
-        save_info(info_path, {'label': self.labels,
-                              'feature': self.feature,
-                              'train_mask': self.train_mask,
-                              'val_mask': self.val_mask,
-                              'test_mask': self.test_mask})
+        save_info(
+            info_path,
+            {
+                "label": self.labels,
+                "feature": self.feature,
+                "train_mask": self.train_mask,
+                "val_mask": self.val_mask,
+                "test_mask": self.test_mask,
+            },
+        )

    def has_cache(self):
-        """ check whether there are processed data in `self.save_path` """
-        graph_path = os.path.join(self.save_path, self.name + '_dgl_graph.bin')
-        info_path = os.path.join(self.save_path, self.name + '_dgl_graph.pkl')
+        """check whether there are processed data in `self.save_path`"""
+        graph_path = os.path.join(self.save_path, self.name + "_dgl_graph.bin")
+        info_path = os.path.join(self.save_path, self.name + "_dgl_graph.pkl")
        return os.path.exists(graph_path) and os.path.exists(info_path)

    def load(self):
        """load processed data from directory `self.save_path`"""
-        graph_path = os.path.join(self.save_path, self.name + '_dgl_graph.bin')
-        info_path = os.path.join(self.save_path, self.name + '_dgl_graph.pkl')
+        graph_path = os.path.join(self.save_path, self.name + "_dgl_graph.bin")
+        info_path = os.path.join(self.save_path, self.name + "_dgl_graph.pkl")

        graphs, _ = load_graphs(str(graph_path))
        info = load_info(str(info_path))
        self.graphs = graphs
-        self.labels = info['label']
-        self.feature = info['feature']
+        self.labels = info["label"]
+        self.feature = info["feature"]

-        self.train_mask = info['train_mask']
-        self.val_mask = info['val_mask']
-        self.test_mask = info['test_mask']
+        self.train_mask = info["train_mask"]
+        self.val_mask = info["val_mask"]
+        self.test_mask = info["test_mask"]

    @property
    def num_classes(self):
@@ -207,7 +225,7 @@ class FakeNewsDataset(DGLBuiltinDataset):
        return self.labels.shape[0]

    def __getitem__(self, i):
-        r""" Get graph and label by index
+        r"""Get graph and label by index

        Parameters
        ----------

--- a/python/dgl/data/flickr.py
+++ b/python/dgl/data/flickr.py
 """Flickr Dataset"""
-import os
 import json
+import os
+
 import numpy as np
 import scipy.sparse as sp
+
 from .. import backend as F
 from ..convert import from_scipy
 from ..transforms import reorder_graph
 from .dgl_dataset import DGLBuiltinDataset
-from .utils import generate_mask_tensor, load_graphs, save_graphs, _get_dgl_url
+from .utils import _get_dgl_url, generate_mask_tensor, load_graphs, save_graphs


 class FlickrDataset(DGLBuiltinDataset):
@@ -65,66 +67,78 @@ class FlickrDataset(DGLBuiltinDataset):
    >>> test_mask = g.ndata['test_mask']
    """

-    def __init__(self, raw_dir=None, force_reload=False, verbose=False, transform=None,
-                 reorder=False):
-        _url = _get_dgl_url('dataset/flickr.zip')
+    def __init__(
+        self,
+        raw_dir=None,
+        force_reload=False,
+        verbose=False,
+        transform=None,
+        reorder=False,
+    ):
+        _url = _get_dgl_url("dataset/flickr.zip")
        self._reorder = reorder
-        super(FlickrDataset, self).__init__(name='flickr',
-                                            raw_dir=raw_dir,
-                                            url=_url,
-                                            force_reload=force_reload,
-                                            verbose=verbose,
-                                            transform=transform)
+        super(FlickrDataset, self).__init__(
+            name="flickr",
+            raw_dir=raw_dir,
+            url=_url,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )

    def process(self):
        """process raw data to graph, labels and masks"""
        coo_adj = sp.load_npz(os.path.join(self.raw_path, "adj_full.npz"))
        g = from_scipy(coo_adj)

-        features = np.load(os.path.join(self.raw_path, 'feats.npy'))
+        features = np.load(os.path.join(self.raw_path, "feats.npy"))
        features = F.tensor(features, dtype=F.float32)

        y = [-1] * features.shape[0]
-        with open(os.path.join(self.raw_path, 'class_map.json')) as f:
+        with open(os.path.join(self.raw_path, "class_map.json")) as f:
            class_map = json.load(f)
            for key, item in class_map.items():
                y[int(key)] = item
        labels = F.tensor(np.array(y), dtype=F.int64)

-        with open(os.path.join(self.raw_path, 'role.json')) as f:
+        with open(os.path.join(self.raw_path, "role.json")) as f:
            role = json.load(f)

        train_mask = np.zeros(features.shape[0], dtype=bool)
-        train_mask[role['tr']] = True
+        train_mask[role["tr"]] = True

        val_mask = np.zeros(features.shape[0], dtype=bool)
-        val_mask[role['va']] = True
+        val_mask[role["va"]] = True

        test_mask = np.zeros(features.shape[0], dtype=bool)
-        test_mask[role['te']] = True
+        test_mask[role["te"]] = True

-        g.ndata['feat'] = features
-        g.ndata['label'] = labels
-        g.ndata['train_mask'] = generate_mask_tensor(train_mask)
-        g.ndata['val_mask'] = generate_mask_tensor(val_mask)
-        g.ndata['test_mask'] = generate_mask_tensor(test_mask)
+        g.ndata["feat"] = features
+        g.ndata["label"] = labels
+        g.ndata["train_mask"] = generate_mask_tensor(train_mask)
+        g.ndata["val_mask"] = generate_mask_tensor(val_mask)
+        g.ndata["test_mask"] = generate_mask_tensor(test_mask)

        if self._reorder:
            self._graph = reorder_graph(
-                g, node_permute_algo='rcmk', edge_permute_algo='dst', store_ids=False)
+                g,
+                node_permute_algo="rcmk",
+                edge_permute_algo="dst",
+                store_ids=False,
+            )
        else:
            self._graph = g

    def has_cache(self):
-        graph_path = os.path.join(self.save_path, 'dgl_graph.bin')
+        graph_path = os.path.join(self.save_path, "dgl_graph.bin")
        return os.path.exists(graph_path)

    def save(self):
-        graph_path = os.path.join(self.save_path, 'dgl_graph.bin')
+        graph_path = os.path.join(self.save_path, "dgl_graph.bin")
        save_graphs(graph_path, self._graph)

    def load(self):
-        graph_path = os.path.join(self.save_path, 'dgl_graph.bin')
+        graph_path = os.path.join(self.save_path, "dgl_graph.bin")
        g, _ = load_graphs(graph_path)
        self._graph = g[0]

@@ -137,7 +151,7 @@ class FlickrDataset(DGLBuiltinDataset):
        return 1

    def __getitem__(self, idx):
-        r""" Get graph object
+        r"""Get graph object

        Parameters
        ----------
@@ -161,4 +175,4 @@ class FlickrDataset(DGLBuiltinDataset):
        if self._transform is None:
            return self._graph
        else:
-            return self._transform(self._graph)
\ No newline at end of file
+            return self._transform(self._graph)
--- a/python/dgl/data/fraud.py
+++ b/python/dgl/data/fraud.py
 """Fraud Dataset
 """
 import os
-from scipy import io
+
 import numpy as np
+from scipy import io

-from .utils import save_graphs, load_graphs, _get_dgl_url
+from .. import backend as F
 from ..convert import heterograph
 from .dgl_dataset import DGLBuiltinDataset
-from .. import backend as F
+from .utils import _get_dgl_url, load_graphs, save_graphs


 class FraudDataset(DGLBuiltinDataset):
@@ -77,61 +78,74 @@ class FraudDataset(DGLBuiltinDataset):
    >>> label = graph.ndata['label']
    """
    file_urls = {
-        'yelp': 'dataset/FraudYelp.zip',
-        'amazon': 'dataset/FraudAmazon.zip'
+        "yelp": "dataset/FraudYelp.zip",
+        "amazon": "dataset/FraudAmazon.zip",
    }
    relations = {
-        'yelp': ['net_rsr', 'net_rtr', 'net_rur'],
-        'amazon': ['net_upu', 'net_usu', 'net_uvu']
-    }
-    file_names = {
-        'yelp': 'YelpChi.mat',
-        'amazon': 'Amazon.mat'
+        "yelp": ["net_rsr", "net_rtr", "net_rur"],
+        "amazon": ["net_upu", "net_usu", "net_uvu"],
    }
-    node_name = {
-        'yelp': 'review',
-        'amazon': 'user'
-    }
-
-    def __init__(self, name, raw_dir=None, random_seed=717, train_size=0.7,
-                 val_size=0.1, force_reload=False, verbose=True, transform=None):
-        assert name in ['yelp', 'amazon'], "only supports 'yelp', or 'amazon'"
+    file_names = {"yelp": "YelpChi.mat", "amazon": "Amazon.mat"}
+    node_name = {"yelp": "review", "amazon": "user"}
+
+    def __init__(
+        self,
+        name,
+        raw_dir=None,
+        random_seed=717,
+        train_size=0.7,
+        val_size=0.1,
+        force_reload=False,
+        verbose=True,
+        transform=None,
+    ):
+        assert name in ["yelp", "amazon"], "only supports 'yelp', or 'amazon'"
        url = _get_dgl_url(self.file_urls[name])
        self.seed = random_seed
        self.train_size = train_size
        self.val_size = val_size
-        super(FraudDataset, self).__init__(name=name,
-                                           url=url,
-                                           raw_dir=raw_dir,
-                                           hash_key=(random_seed, train_size, val_size),
-                                           force_reload=force_reload,
-                                           verbose=verbose,
-                                           transform=transform)
+        super(FraudDataset, self).__init__(
+            name=name,
+            url=url,
+            raw_dir=raw_dir,
+            hash_key=(random_seed, train_size, val_size),
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )

    def process(self):
        """process raw data to graph, labels, splitting masks"""
        file_path = os.path.join(self.raw_path, self.file_names[self.name])

        data = io.loadmat(file_path)
-        node_features = data['features'].todense()
+        node_features = data["features"].todense()
        # remove additional dimension of length 1 in raw .mat file
-        node_labels = data['label'].squeeze()
+        node_labels = data["label"].squeeze()

        graph_data = {}
        for relation in self.relations[self.name]:
            adj = data[relation].tocoo()
            row, col = adj.row, adj.col
-            graph_data[(self.node_name[self.name], relation, self.node_name[self.name])] = (row, col)
+            graph_data[
+                (self.node_name[self.name], relation, self.node_name[self.name])
+            ] = (row, col)
        g = heterograph(graph_data)

-        g.ndata['feature'] = F.tensor(node_features, dtype=F.data_type_dict['float32'])
-        g.ndata['label'] = F.tensor(node_labels, dtype=F.data_type_dict['int64'])
+        g.ndata["feature"] = F.tensor(
+            node_features, dtype=F.data_type_dict["float32"]
+        )
+        g.ndata["label"] = F.tensor(
+            node_labels, dtype=F.data_type_dict["int64"]
+        )
        self.graph = g

-        self._random_split(g.ndata['feature'], self.seed, self.train_size, self.val_size)
+        self._random_split(
+            g.ndata["feature"], self.seed, self.train_size, self.val_size
+        )

    def __getitem__(self, idx):
-        r""" Get graph object
+        r"""Get graph object

        Parameters
        ----------
@@ -171,51 +185,61 @@ class FraudDataset(DGLBuiltinDataset):

    def save(self):
        """save processed data to directory `self.save_path`"""
-        graph_path = os.path.join(self.save_path, self.name + '_dgl_graph_{}.bin'.format(self.hash))
+        graph_path = os.path.join(
+            self.save_path, self.name + "_dgl_graph_{}.bin".format(self.hash)
+        )
        save_graphs(str(graph_path), self.graph)

    def load(self):
        """load processed data from directory `self.save_path`"""
-        graph_path = os.path.join(self.save_path, self.name + '_dgl_graph_{}.bin'.format(self.hash))
+        graph_path = os.path.join(
+            self.save_path, self.name + "_dgl_graph_{}.bin".format(self.hash)
+        )
        graph_list, _ = load_graphs(str(graph_path))
        g = graph_list[0]
        self.graph = g

    def has_cache(self):
        """check whether there are processed data in `self.save_path`"""
-        graph_path = os.path.join(self.save_path, self.name + '_dgl_graph_{}.bin'.format(self.hash))
+        graph_path = os.path.join(
+            self.save_path, self.name + "_dgl_graph_{}.bin".format(self.hash)
+        )
        return os.path.exists(graph_path)

    def _random_split(self, x, seed=717, train_size=0.7, val_size=0.1):
        """split the dataset into training set, validation set and testing set"""

-        assert 0 <= train_size + val_size <= 1, \
-            "The sum of valid training set size and validation set size " \
+        assert 0 <= train_size + val_size <= 1, (
+            "The sum of valid training set size and validation set size "
            "must between 0 and 1 (inclusive)."
+        )

        N = x.shape[0]
        index = np.arange(N)
-        if self.name == 'amazon':
+        if self.name == "amazon":
            # 0-3304 are unlabeled nodes
            index = np.arange(3305, N)

        index = np.random.RandomState(seed).permutation(index)
-        train_idx = index[:int(train_size * len(index))]
-        val_idx = index[len(index) - int(val_size * len(index)):]
-        test_idx = index[int(train_size * len(index)):len(index) - int(val_size * len(index))]
+        train_idx = index[: int(train_size * len(index))]
+        val_idx = index[len(index) - int(val_size * len(index)) :]
+        test_idx = index[
+            int(train_size * len(index)) : len(index)
+            - int(val_size * len(index))
+        ]
        train_mask = np.zeros(N, dtype=np.bool)
        val_mask = np.zeros(N, dtype=np.bool)
        test_mask = np.zeros(N, dtype=np.bool)
        train_mask[train_idx] = True
        val_mask[val_idx] = True
        test_mask[test_idx] = True
-        self.graph.ndata['train_mask'] = F.tensor(train_mask)
-        self.graph.ndata['val_mask'] = F.tensor(val_mask)
-        self.graph.ndata['test_mask'] = F.tensor(test_mask)
+        self.graph.ndata["train_mask"] = F.tensor(train_mask)
+        self.graph.ndata["val_mask"] = F.tensor(val_mask)
+        self.graph.ndata["test_mask"] = F.tensor(test_mask)


 class FraudYelpDataset(FraudDataset):
-    r""" Fraud Yelp Dataset
+    r"""Fraud Yelp Dataset

    The Yelp dataset includes hotel and restaurant reviews filtered (spam) and recommended
    (legitimate) by Yelp. A spam review detection task can be conducted, which is a binary
@@ -278,20 +302,30 @@ class FraudYelpDataset(FraudDataset):
    >>> label = graph.ndata['label']
    """

-    def __init__(self, raw_dir=None, random_seed=717, train_size=0.7,
-                 val_size=0.1, force_reload=False, verbose=True, transform=None):
-        super(FraudYelpDataset, self).__init__(name='yelp',
-                                               raw_dir=raw_dir,
-                                               random_seed=random_seed,
-                                               train_size=train_size,
-                                               val_size=val_size,
-                                               force_reload=force_reload,
-                                               verbose=verbose,
-                                               transform=transform)
+    def __init__(
+        self,
+        raw_dir=None,
+        random_seed=717,
+        train_size=0.7,
+        val_size=0.1,
+        force_reload=False,
+        verbose=True,
+        transform=None,
+    ):
+        super(FraudYelpDataset, self).__init__(
+            name="yelp",
+            raw_dir=raw_dir,
+            random_seed=random_seed,
+            train_size=train_size,
+            val_size=val_size,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )


 class FraudAmazonDataset(FraudDataset):
-    r""" Fraud Amazon Dataset
+    r"""Fraud Amazon Dataset

    The Amazon dataset includes product reviews under the Musical Instruments category.
    Users with more than 80% helpful votes are labelled as benign entities and users with
@@ -359,13 +393,23 @@ class FraudAmazonDataset(FraudDataset):
    >>> label = graph.ndata['label']
    """

-    def __init__(self, raw_dir=None, random_seed=717, train_size=0.7,
-                 val_size=0.1, force_reload=False, verbose=True, transform=None):
-        super(FraudAmazonDataset, self).__init__(name='amazon',
-                                                 raw_dir=raw_dir,
-                                                 random_seed=random_seed,
-                                                 train_size=train_size,
-                                                 val_size=val_size,
-                                                 force_reload=force_reload,
-                                                 verbose=verbose,
-                                                 transform=transform)
+    def __init__(
+        self,
+        raw_dir=None,
+        random_seed=717,
+        train_size=0.7,
+        val_size=0.1,
+        force_reload=False,
+        verbose=True,
+        transform=None,
+    ):
+        super(FraudAmazonDataset, self).__init__(
+            name="amazon",
+            raw_dir=raw_dir,
+            random_seed=random_seed,
+            train_size=train_size,
+            val_size=val_size,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )
--- a/python/dgl/data/gdelt.py
+++ b/python/dgl/data/gdelt.py
 """ GDELT dataset for temporal graph """
-import numpy as np
 import os

-from .dgl_dataset import DGLBuiltinDataset
-from .utils import loadtxt, save_info, load_info, _get_dgl_url
-from ..convert import graph as dgl_graph
+import numpy as np
+
 from .. import backend as F
+from ..convert import graph as dgl_graph
+from .dgl_dataset import DGLBuiltinDataset
+from .utils import _get_dgl_url, load_info, loadtxt, save_info


 class GDELTDataset(DGLBuiltinDataset):
@@ -69,23 +70,32 @@ class GDELTDataset(DGLBuiltinDataset):
    ....
    >>>
    """
-    def __init__(self, mode='train', raw_dir=None,
-                 force_reload=False, verbose=False, transform=None):
+
+    def __init__(
+        self,
+        mode="train",
+        raw_dir=None,
+        force_reload=False,
+        verbose=False,
+        transform=None,
+    ):
        mode = mode.lower()
-        assert mode in ['train', 'valid', 'test'], "Mode not valid."
+        assert mode in ["train", "valid", "test"], "Mode not valid."
        self.mode = mode
        self.num_nodes = 23033
-        _url = _get_dgl_url('dataset/gdelt.zip')
-        super(GDELTDataset, self).__init__(name='GDELT',
-                                           url=_url,
-                                           raw_dir=raw_dir,
-                                           force_reload=force_reload,
-                                           verbose=verbose,
-                                           transform=transform)
+        _url = _get_dgl_url("dataset/gdelt.zip")
+        super(GDELTDataset, self).__init__(
+            name="GDELT",
+            url=_url,
+            raw_dir=raw_dir,
+            force_reload=force_reload,
+            verbose=verbose,
+            transform=transform,
+        )

    def process(self):
-        file_path = os.path.join(self.raw_path, self.mode + '.txt')
-        self.data = loadtxt(file_path, delimiter='\t').astype(np.int64)
+        file_path = os.path.join(self.raw_path, self.mode + ".txt")
+        self.data = loadtxt(file_path, delimiter="\t").astype(np.int64)

        # The source code is not released, but the paper indicates there're
        # totally 137 samples. The cutoff below has exactly 137 samples.
@@ -94,25 +104,34 @@ class GDELTDataset(DGLBuiltinDataset):
        self._end_time = self.time_index.max()

    def has_cache(self):
-        info_path = os.path.join(self.save_path, self.mode + '_info.pkl')
+        info_path = os.path.join(self.save_path, self.mode + "_info.pkl")
        return os.path.exists(info_path)

    def save(self):
-        info_path = os.path.join(self.save_path, self.mode + '_info.pkl')
-        save_info(info_path, {'data': self.data,
-                              'time_index': self.time_index,
-                              'start_time': self.start_time,
-                              'end_time': self.end_time})
+        info_path = os.path.join(self.save_path, self.mode + "_info.pkl")
+        save_info(
+            info_path,
+            {
+                "data": self.data,
+                "time_index": self.time_index,
+                "start_time": self.start_time,
+                "end_time": self.end_time,
+            },
+        )

    def load(self):
-        info_path = os.path.join(self.save_path, self.mode + '_info.pkl')
+        info_path = os.path.join(self.save_path, self.mode + "_info.pkl")
        info = load_info(info_path)
-        self.data, self.time_index, self._start_time, self._end_time = \
-            info['data'], info['time_index'], info['start_time'], info['end_time']
+        self.data, self.time_index, self._start_time, self._end_time = (
+            info["data"],
+            info["time_index"],
+            info["start_time"],
+            info["end_time"],
+        )

    @property
    def start_time(self):
-        r""" Start time of events in the temporal graph
+        r"""Start time of events in the temporal graph

        Returns
        -------
@@ -122,7 +141,7 @@ class GDELTDataset(DGLBuiltinDataset):

    @property
    def end_time(self):
-        r""" End time of events in the temporal graph
+        r"""End time of events in the temporal graph

        Returns
        -------
@@ -131,7 +150,7 @@ class GDELTDataset(DGLBuiltinDataset):
        return self._end_time

    def __getitem__(self, t):
-        r""" Get graph by with events before time `t + self.start_time`
+        r"""Get graph by with events before time `t + self.start_time`

        Parameters
        ----------
@@ -153,7 +172,9 @@ class GDELTDataset(DGLBuiltinDataset):
        edges = self.data[row_mask][:, [0, 2]]
        rate = self.data[row_mask][:, 1]
        g = dgl_graph((edges[:, 0], edges[:, 1]))
-        g.edata['rel_type'] = F.tensor(rate.reshape(-1, 1), dtype=F.data_type_dict['int64'])
+        g.edata["rel_type"] = F.tensor(
+            rate.reshape(-1, 1), dtype=F.data_type_dict["int64"]
+        )
        if self._transform is not None:
            g = self._transform(g)
        return g
@@ -169,7 +190,7 @@ class GDELTDataset(DGLBuiltinDataset):

    @property
    def is_temporal(self):
-        r""" Does the dataset contain temporal graphs
+        r"""Does the dataset contain temporal graphs

        Returns
        -------