[Misc] Auto-format tests. (#5313)

* [Misc] Auto-format tests. * more --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>

[Misc] Auto-format tests. (#5313)
* [Misc] Auto-format tests. * more --------- Co-authored-by: Ubuntu <ubuntu@ip-172-31-28-63.ap-northeast-1.compute.internal>
74c9d27d · Hongzhi (Steve), Chen · GitHub · 86193c26 · 74c9d27d · 74c9d27d
Unverified Commit 74c9d27d authored Feb 17, 2023 by Hongzhi (Steve), Chen Committed by GitHub Feb 17, 2023
20 changed files
--- a/tests/python/common/backend/test_tensor.py
+++ b/tests/python/common/backend/test_tensor.py
 import unittest

 import backend as F
-import numpy as np

 import dgl
 import dgl.ndarray as nd
+import numpy as np


 @unittest.skipIf(

--- a/tests/python/common/data/test_data.py
+++ b/tests/python/common/data/test_data.py
@@ -735,11 +735,7 @@ def _test_DefaultDataParser():
    # string consists of non-numeric values
    with tempfile.TemporaryDirectory() as test_dir:
        csv_path = os.path.join(test_dir, "nodes.csv")
-        df = pd.DataFrame(
-            {
-                "label": ["a", "b", "c"],
-            }
-        )
+        df = pd.DataFrame({"label": ["a", "b", "c"]})
        df.to_csv(csv_path, index=False)
        dp = DefaultDataParser()
        df = pd.read_csv(csv_path)
@@ -752,11 +748,7 @@ def _test_DefaultDataParser():
    # csv has index column which is ignored as it's unnamed
    with tempfile.TemporaryDirectory() as test_dir:
        csv_path = os.path.join(test_dir, "nodes.csv")
-        df = pd.DataFrame(
-            {
-                "label": [1, 2, 3],
-            }
-        )
+        df = pd.DataFrame({"label": [1, 2, 3]})
        df.to_csv(csv_path)
        dp = DefaultDataParser()
        df = pd.read_csv(csv_path)
@@ -1042,9 +1034,7 @@ def _test_load_edge_data_from_csv():

        # required headers are missing
        df = pd.DataFrame(
-            {
-                "src_id": np.random.randint(num_nodes, size=num_edges),
-            }
+            {"src_id": np.random.randint(num_nodes, size=num_edges)}
        )
        csv_path = os.path.join(test_dir, "edges.csv")
        df.to_csv(csv_path, index=False)
@@ -1056,9 +1046,7 @@ def _test_load_edge_data_from_csv():
            expect_except = True
        assert expect_except
        df = pd.DataFrame(
-            {
-                "dst_id": np.random.randint(num_nodes, size=num_edges),
-            }
+            {"dst_id": np.random.randint(num_nodes, size=num_edges)}
        )
        csv_path = os.path.join(test_dir, "edges.csv")
        df.to_csv(csv_path, index=False)

--- a/tests/python/common/data/test_serialize.py
+++ b/tests/python/common/data/test_serialize.py
@@ -4,12 +4,12 @@ import time
 import unittest

 import backend as F
-import numpy as np
-import pytest
-import scipy as sp

 import dgl
 import dgl.ndarray as nd
+import numpy as np
+import pytest
+import scipy as sp
 from dgl import DGLGraph
 from dgl.data.utils import load_labels, load_tensors, save_tensors


--- a/tests/python/common/dataloading/test_dataloader.py
+++ b/tests/python/common/dataloading/test_dataloader.py
 import unittest

 import backend as F
-from test_utils import parametrize_idtype

 import dgl
 from dgl.dataloading import (
-    NeighborSampler,
    as_edge_prediction_sampler,
    negative_sampler,
+    NeighborSampler,
 )
+from test_utils import parametrize_idtype


 def create_test_graph(idtype):

--- a/tests/python/common/function/test_basics.py
+++ b/tests/python/common/function/test_basics.py
+import unittest
+from collections import defaultdict as ddict
+
 import backend as F
+
 import dgl
+import networkx as nx
 import numpy as np
 import scipy.sparse as ssp
-import networkx as nx
 from dgl import DGLGraph
-from collections import defaultdict as ddict
-import unittest
 from test_utils import parametrize_idtype

 D = 5
 reduce_msg_shapes = set()

+
 def message_func(edges):
-    assert F.ndim(edges.src['h']) == 2
-    assert F.shape(edges.src['h'])[1] == D
-    return {'m' : edges.src['h']}
+    assert F.ndim(edges.src["h"]) == 2
+    assert F.shape(edges.src["h"])[1] == D
+    return {"m": edges.src["h"]}
+

 def reduce_func(nodes):
-    msgs = nodes.mailbox['m']
+    msgs = nodes.mailbox["m"]
    reduce_msg_shapes.add(tuple(msgs.shape))
    assert F.ndim(msgs) == 3
    assert F.shape(msgs)[2] == D
-    return {'accum' : F.sum(msgs, 1)}
+    return {"accum": F.sum(msgs, 1)}
+

 def apply_node_func(nodes):
-    return {'h' : nodes.data['h'] + nodes.data['accum']}
+    return {"h": nodes.data["h"] + nodes.data["accum"]}
+

 def generate_graph_old(grad=False):
    g = DGLGraph()
-    g.add_nodes(10) # 10 nodes
+    g.add_nodes(10)  # 10 nodes
    # create a graph where 0 is the source and 9 is the sink
    # 17 edges
    for i in range(1, 9):
@@ -43,14 +49,15 @@ def generate_graph_old(grad=False):
        ncol = F.attach_grad(ncol)
        ecol = F.attach_grad(ecol)

-    g.ndata['h'] = ncol
-    g.edata['w'] = ecol
+    g.ndata["h"] = ncol
+    g.edata["w"] = ecol
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)
    return g

+
 def generate_graph(idtype, grad=False):
-    '''
+    """
    s, d, eid
    0, 1, 0
    1, 9, 1
@@ -69,7 +76,7 @@ def generate_graph(idtype, grad=False):
    0, 8, 14
    8, 9, 15
    9, 0, 16
-    '''
+    """
    u = F.tensor([0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0, 8, 9])
    v = F.tensor([1, 9, 2, 9, 3, 9, 4, 9, 5, 9, 6, 9, 7, 9, 8, 9, 0])
    g = dgl.graph((u, v), idtype=idtype)
@@ -80,37 +87,51 @@ def generate_graph(idtype, grad=False):
        ncol = F.attach_grad(ncol)
        ecol = F.attach_grad(ecol)

-    g.ndata['h'] = ncol
-    g.edata['w'] = ecol
+    g.ndata["h"] = ncol
+    g.edata["w"] = ecol
    g.set_n_initializer(dgl.init.zero_initializer)
    g.set_e_initializer(dgl.init.zero_initializer)
    return g

+
 def test_compatible():
    g = generate_graph_old()

+
 @parametrize_idtype
 def test_batch_setter_getter(idtype):
    def _pfc(x):
-        return list(F.zerocopy_to_numpy(x)[:,0])
+        return list(F.zerocopy_to_numpy(x)[:, 0])
+
    g = generate_graph(idtype)
    # set all nodes
-    g.ndata['h'] = F.zeros((10, D))
-    assert F.allclose(g.ndata['h'], F.zeros((10, D)))
+    g.ndata["h"] = F.zeros((10, D))
+    assert F.allclose(g.ndata["h"], F.zeros((10, D)))
    # pop nodes
    old_len = len(g.ndata)
-    g.ndata.pop('h')
+    g.ndata.pop("h")
    assert len(g.ndata) == old_len - 1
-    g.ndata['h'] = F.zeros((10, D))
+    g.ndata["h"] = F.zeros((10, D))
    # set partial nodes
    u = F.tensor([1, 3, 5], g.idtype)
-    g.nodes[u].data['h'] = F.ones((3, D))
-    assert _pfc(g.ndata['h']) == [0., 1., 0., 1., 0., 1., 0., 0., 0., 0.]
+    g.nodes[u].data["h"] = F.ones((3, D))
+    assert _pfc(g.ndata["h"]) == [
+        0.0,
+        1.0,
+        0.0,
+        1.0,
+        0.0,
+        1.0,
+        0.0,
+        0.0,
+        0.0,
+        0.0,
+    ]
    # get partial nodes
    u = F.tensor([1, 2, 3], g.idtype)
-    assert _pfc(g.nodes[u].data['h']) == [1., 0., 1.]
+    assert _pfc(g.nodes[u].data["h"]) == [1.0, 0.0, 1.0]

-    '''
+    """
    s, d, eid
    0, 1, 0
    1, 9, 1
@@ -129,49 +150,54 @@ def test_batch_setter_getter(idtype):
    0, 8, 14
    8, 9, 15
    9, 0, 16
-    '''
+    """
    # set all edges
-    g.edata['l'] = F.zeros((17, D))
-    assert _pfc(g.edata['l']) == [0.] * 17
+    g.edata["l"] = F.zeros((17, D))
+    assert _pfc(g.edata["l"]) == [0.0] * 17
    # pop edges
    old_len = len(g.edata)
-    g.edata.pop('l')
+    g.edata.pop("l")
    assert len(g.edata) == old_len - 1
-    g.edata['l'] = F.zeros((17, D))
+    g.edata["l"] = F.zeros((17, D))
    # set partial edges (many-many)
    u = F.tensor([0, 0, 2, 5, 9], g.idtype)
    v = F.tensor([1, 3, 9, 9, 0], g.idtype)
-    g.edges[u, v].data['l'] = F.ones((5, D))
-    truth = [0.] * 17
-    truth[0] = truth[4] = truth[3] = truth[9] = truth[16] = 1.
-    assert _pfc(g.edata['l']) == truth
+    g.edges[u, v].data["l"] = F.ones((5, D))
+    truth = [0.0] * 17
+    truth[0] = truth[4] = truth[3] = truth[9] = truth[16] = 1.0
+    assert _pfc(g.edata["l"]) == truth
    u = F.tensor([3, 4, 6], g.idtype)
    v = F.tensor([9, 9, 9], g.idtype)
-    g.edges[u, v].data['l'] = F.ones((3, D))
-    truth[5] = truth[7] = truth[11] = 1.
-    assert _pfc(g.edata['l']) == truth
+    g.edges[u, v].data["l"] = F.ones((3, D))
+    truth[5] = truth[7] = truth[11] = 1.0
+    assert _pfc(g.edata["l"]) == truth
    u = F.tensor([0, 0, 0], g.idtype)
    v = F.tensor([4, 5, 6], g.idtype)
-    g.edges[u, v].data['l'] = F.ones((3, D))
-    truth[6] = truth[8] = truth[10] = 1.
-    assert _pfc(g.edata['l']) == truth
+    g.edges[u, v].data["l"] = F.ones((3, D))
+    truth[6] = truth[8] = truth[10] = 1.0
+    assert _pfc(g.edata["l"]) == truth
    u = F.tensor([0, 6, 0], g.idtype)
    v = F.tensor([6, 9, 7], g.idtype)
-    assert _pfc(g.edges[u, v].data['l']) == [1.0, 1.0, 0.0]
+    assert _pfc(g.edges[u, v].data["l"]) == [1.0, 1.0, 0.0]
+

 @parametrize_idtype
 def test_batch_setter_autograd(idtype):
    g = generate_graph(idtype, grad=True)
-    h1 = g.ndata['h']
+    h1 = g.ndata["h"]
    # partial set
    v = F.tensor([1, 2, 8], g.idtype)
    hh = F.attach_grad(F.zeros((len(v), D)))
    with F.record_grad():
-        g.nodes[v].data['h'] = hh
-        h2 = g.ndata['h']
+        g.nodes[v].data["h"] = hh
+        h2 = g.ndata["h"]
        F.backward(h2, F.ones((10, D)) * 2)
-    assert F.array_equal(F.grad(h1)[:,0], F.tensor([2., 0., 0., 2., 2., 2., 2., 2., 0., 2.]))
-    assert F.array_equal(F.grad(hh)[:,0], F.tensor([2., 2., 2.]))
+    assert F.array_equal(
+        F.grad(h1)[:, 0],
+        F.tensor([2.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 0.0, 2.0]),
+    )
+    assert F.array_equal(F.grad(hh)[:, 0], F.tensor([2.0, 2.0, 2.0]))
+

 def _test_nx_conversion():
    # check conversion between networkx and DGLGraph
@@ -195,8 +221,8 @@ def _test_nx_conversion():
        if num_edges > 0:
            edge_feat = ddict(lambda: [0] * num_edges)
            for u, v, attr in nxg.edges(data=True):
-                assert len(attr) == len(ef) + 1 # extra id
-                eid = attr['id']
+                assert len(attr) == len(ef) + 1  # extra id
+                eid = attr["id"]
                for k in ef:
                    edge_feat[k][eid] = F.unsqueeze(attr[k], 0)
            for k in edge_feat:
@@ -211,18 +237,18 @@ def _test_nx_conversion():
    e1 = F.randn((4, 5))
    e2 = F.randn((4, 7))
    g = dgl.graph(([0, 1, 3, 4], [2, 4, 0, 3]))
-    g.ndata.update({'n1': n1, 'n2': n2, 'n3': n3})
-    g.edata.update({'e1': e1, 'e2': e2})
+    g.ndata.update({"n1": n1, "n2": n2, "n3": n3})
+    g.edata.update({"e1": e1, "e2": e2})

    # convert to networkx
-    nxg = g.to_networkx(node_attrs=['n1', 'n3'], edge_attrs=['e1', 'e2'])
+    nxg = g.to_networkx(node_attrs=["n1", "n3"], edge_attrs=["e1", "e2"])
    assert len(nxg) == 5
    assert nxg.size() == 4
-    _check_nx_feature(nxg, {'n1': n1, 'n3': n3}, {'e1': e1, 'e2': e2})
+    _check_nx_feature(nxg, {"n1": n1, "n3": n3}, {"e1": e1, "e2": e2})

    # convert to DGLGraph, nx graph has id in edge feature
    # use id feature to test non-tensor copy
-    g = dgl.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1', 'id'])
+    g = dgl.from_networkx(nxg, node_attrs=["n1"], edge_attrs=["e1", "id"])
    # check graph size
    assert g.number_of_nodes() == 5
    assert g.number_of_edges() == 4
@@ -231,32 +257,34 @@ def _test_nx_conversion():
    assert len(g.ndata) == 1
    assert len(g.edata) == 2
    # check feature values
-    assert F.allclose(g.ndata['n1'], n1)
+    assert F.allclose(g.ndata["n1"], n1)
    # with id in nx edge feature, e1 should follow original order
-    assert F.allclose(g.edata['e1'], e1)
-    assert F.array_equal(F.astype(g.edata['id'], F.int64), F.copy_to(F.arange(0, 4), F.cpu()))
+    assert F.allclose(g.edata["e1"], e1)
+    assert F.array_equal(
+        F.astype(g.edata["id"], F.int64), F.copy_to(F.arange(0, 4), F.cpu())
+    )

    # test conversion after modifying DGLGraph
-    g.edata.pop('id') # pop id so we don't need to provide id when adding edges
+    g.edata.pop("id")  # pop id so we don't need to provide id when adding edges
    new_n = F.randn((2, 3))
    new_e = F.randn((3, 5))
-    g.add_nodes(2, data={'n1': new_n})
+    g.add_nodes(2, data={"n1": new_n})
    # add three edges, one is a multi-edge
-    g.add_edges([3, 6, 0], [4, 5, 2], data={'e1': new_e})
+    g.add_edges([3, 6, 0], [4, 5, 2], data={"e1": new_e})
    n1 = F.cat((n1, new_n), 0)
    e1 = F.cat((e1, new_e), 0)
    # convert to networkx again
-    nxg = g.to_networkx(node_attrs=['n1'], edge_attrs=['e1'])
+    nxg = g.to_networkx(node_attrs=["n1"], edge_attrs=["e1"])
    assert len(nxg) == 7
    assert nxg.size() == 7
-    _check_nx_feature(nxg, {'n1': n1}, {'e1': e1})
+    _check_nx_feature(nxg, {"n1": n1}, {"e1": e1})

    # now test convert from networkx without id in edge feature
    # first pop id in edge feature
    for _, _, attr in nxg.edges(data=True):
-        attr.pop('id')
+        attr.pop("id")
    # test with a new graph
-    g = dgl.from_networkx(nxg, node_attrs=['n1'], edge_attrs=['e1'])
+    g = dgl.from_networkx(nxg, node_attrs=["n1"], edge_attrs=["e1"])
    # check graph size
    assert g.number_of_nodes() == 7
    assert g.number_of_edges() == 7
@@ -264,57 +292,63 @@ def _test_nx_conversion():
    assert len(g.ndata) == 1
    assert len(g.edata) == 1
    # check feature values
-    assert F.allclose(g.ndata['n1'], n1)
+    assert F.allclose(g.ndata["n1"], n1)
    # edge feature order follows nxg.edges()
    edge_feat = []
    for _, _, attr in nxg.edges(data=True):
-        edge_feat.append(F.unsqueeze(attr['e1'], 0))
+        edge_feat.append(F.unsqueeze(attr["e1"], 0))
    edge_feat = F.cat(edge_feat, 0)
-    assert F.allclose(g.edata['e1'], edge_feat)
+    assert F.allclose(g.edata["e1"], edge_feat)

    # Test converting from a networkx graph whose nodes are
    # not labeled with consecutive-integers.
    nxg = nx.cycle_graph(5)
    nxg.remove_nodes_from([0, 4])
    for u in nxg.nodes():
-        nxg.nodes[u]['h'] = F.tensor([u])
+        nxg.nodes[u]["h"] = F.tensor([u])
    for u, v, d in nxg.edges(data=True):
-        d['h'] = F.tensor([u, v])
+        d["h"] = F.tensor([u, v])

-    g = dgl.from_networkx(nxg, node_attrs=['h'], edge_attrs=['h'])
+    g = dgl.from_networkx(nxg, node_attrs=["h"], edge_attrs=["h"])
    assert g.number_of_nodes() == 3
    assert g.number_of_edges() == 4
    assert g.has_edge_between(0, 1)
    assert g.has_edge_between(1, 2)
-    assert F.allclose(g.ndata['h'], F.tensor([[1.], [2.], [3.]]))
-    assert F.allclose(g.edata['h'], F.tensor([[1., 2.], [1., 2.],
-                                              [2., 3.], [2., 3.]]))
+    assert F.allclose(g.ndata["h"], F.tensor([[1.0], [2.0], [3.0]]))
+    assert F.allclose(
+        g.edata["h"], F.tensor([[1.0, 2.0], [1.0, 2.0], [2.0, 3.0], [2.0, 3.0]])
+    )
+

 @parametrize_idtype
 def test_apply_nodes(idtype):
    def _upd(nodes):
-        return {'h' : nodes.data['h'] * 2}
+        return {"h": nodes.data["h"] * 2}
+
    g = generate_graph(idtype)
-    old = g.ndata['h']
+    old = g.ndata["h"]
    g.apply_nodes(_upd)
-    assert F.allclose(old * 2, g.ndata['h'])
+    assert F.allclose(old * 2, g.ndata["h"])
    u = F.tensor([0, 3, 4, 6], g.idtype)
-    g.apply_nodes(lambda nodes : {'h' : nodes.data['h'] * 0.}, u)
-    assert F.allclose(F.gather_row(g.ndata['h'], u), F.zeros((4, D)))
+    g.apply_nodes(lambda nodes: {"h": nodes.data["h"] * 0.0}, u)
+    assert F.allclose(F.gather_row(g.ndata["h"], u), F.zeros((4, D)))
+

 @parametrize_idtype
 def test_apply_edges(idtype):
    def _upd(edges):
-        return {'w' : edges.data['w'] * 2}
+        return {"w": edges.data["w"] * 2}
+
    g = generate_graph(idtype)
-    old = g.edata['w']
+    old = g.edata["w"]
    g.apply_edges(_upd)
-    assert F.allclose(old * 2, g.edata['w'])
+    assert F.allclose(old * 2, g.edata["w"])
    u = F.tensor([0, 0, 0, 4, 5, 6], g.idtype)
    v = F.tensor([1, 2, 3, 9, 9, 9], g.idtype)
-    g.apply_edges(lambda edges : {'w' : edges.data['w'] * 0.}, (u, v))
+    g.apply_edges(lambda edges: {"w": edges.data["w"] * 0.0}, (u, v))
    eid = F.tensor(g.edge_ids(u, v))
-    assert F.allclose(F.gather_row(g.edata['w'], eid), F.zeros((6, D)))
+    assert F.allclose(F.gather_row(g.edata["w"], eid), F.zeros((6, D)))
+

 @parametrize_idtype
 def test_update_routines(idtype):
@@ -325,7 +359,7 @@ def test_update_routines(idtype):
    u = [0, 0, 0, 4, 5, 6]
    v = [1, 2, 3, 9, 9, 9]
    g.send_and_recv((u, v), message_func, reduce_func, apply_node_func)
-    assert(reduce_msg_shapes == {(1, 3, D), (3, 1, D)})
+    assert reduce_msg_shapes == {(1, 3, D), (3, 1, D)}
    reduce_msg_shapes.clear()
    try:
        g.send_and_recv([u, v])
@@ -337,70 +371,82 @@ def test_update_routines(idtype):
    v = F.tensor([1, 2, 3, 9], g.idtype)
    reduce_msg_shapes.clear()
    g.pull(v, message_func, reduce_func, apply_node_func)
-    assert(reduce_msg_shapes == {(1, 8, D), (3, 1, D)})
+    assert reduce_msg_shapes == {(1, 8, D), (3, 1, D)}
    reduce_msg_shapes.clear()

    # push
    v = F.tensor([0, 1, 2, 3], g.idtype)
    reduce_msg_shapes.clear()
    g.push(v, message_func, reduce_func, apply_node_func)
-    assert(reduce_msg_shapes == {(1, 3, D), (8, 1, D)})
+    assert reduce_msg_shapes == {(1, 3, D), (8, 1, D)}
    reduce_msg_shapes.clear()

    # update_all
    reduce_msg_shapes.clear()
    g.update_all(message_func, reduce_func, apply_node_func)
-    assert(reduce_msg_shapes == {(1, 8, D), (9, 1, D)})
+    assert reduce_msg_shapes == {(1, 8, D), (9, 1, D)}
    reduce_msg_shapes.clear()

+
 @parametrize_idtype
 def test_update_all_0deg(idtype):
    # test#1
    g = dgl.graph(([1, 2, 3, 4], [0, 0, 0, 0]), idtype=idtype, device=F.ctx())
+
    def _message(edges):
-        return {'m' : edges.src['h']}
+        return {"m": edges.src["h"]}
+
    def _reduce(nodes):
-        return {'x' : nodes.data['h'] + F.sum(nodes.mailbox['m'], 1)}
+        return {"x": nodes.data["h"] + F.sum(nodes.mailbox["m"], 1)}
+
    def _apply(nodes):
-        return {'x' : nodes.data['x'] * 2}
+        return {"x": nodes.data["x"] * 2}
+
    def _init2(shape, dtype, ctx, ids):
        return 2 + F.zeros(shape, dtype, ctx)
-    g.set_n_initializer(_init2, 'x')
+
+    g.set_n_initializer(_init2, "x")
    old_repr = F.randn((5, 5))
-    g.ndata['h'] = old_repr
+    g.ndata["h"] = old_repr
    g.update_all(_message, _reduce, _apply)
-    new_repr = g.ndata['x']
+    new_repr = g.ndata["x"]
    # the first row of the new_repr should be the sum of all the node
    # features; while the 0-deg nodes should be initialized by the
    # initializer and applied with UDF.
-    assert F.allclose(new_repr[1:], 2*(2+F.zeros((4,5))))
+    assert F.allclose(new_repr[1:], 2 * (2 + F.zeros((4, 5))))
    assert F.allclose(new_repr[0], 2 * F.sum(old_repr, 0))

    # test#2: graph with no edge
    g = dgl.graph(([], []), num_nodes=5, idtype=idtype, device=F.ctx())
-    g.ndata['h'] = old_repr
-    g.update_all(_message, _reduce, lambda nodes : {'h' : nodes.data['h'] * 2})
-    new_repr = g.ndata['h']
+    g.ndata["h"] = old_repr
+    g.update_all(_message, _reduce, lambda nodes: {"h": nodes.data["h"] * 2})
+    new_repr = g.ndata["h"]
    # should fallback to apply
-    assert F.allclose(new_repr, 2*old_repr)
+    assert F.allclose(new_repr, 2 * old_repr)
+

 @parametrize_idtype
 def test_pull_0deg(idtype):
    g = dgl.graph(([0], [1]), idtype=idtype, device=F.ctx())
+
    def _message(edges):
-        return {'m' : edges.src['h']}
+        return {"m": edges.src["h"]}
+
    def _reduce(nodes):
-        return {'x' : nodes.data['h'] + F.sum(nodes.mailbox['m'], 1)}
+        return {"x": nodes.data["h"] + F.sum(nodes.mailbox["m"], 1)}
+
    def _apply(nodes):
-        return {'x' : nodes.data['x'] * 2}
+        return {"x": nodes.data["x"] * 2}
+
    def _init2(shape, dtype, ctx, ids):
        return 2 + F.zeros(shape, dtype, ctx)
-    g.set_n_initializer(_init2, 'x')
+
+    g.set_n_initializer(_init2, "x")
    # test#1: pull both 0deg and non-0deg nodes
    old = F.randn((2, 5))
-    g.ndata['h'] = old
+    g.ndata["h"] = old
    g.pull([0, 1], _message, _reduce, _apply)
-    new = g.ndata['x']
+    new = g.ndata["x"]
    # 0deg check: initialized with the func and got applied
    assert F.allclose(new[0], F.full_1d(5, 4, dtype=F.float32))
    # non-0deg check
@@ -408,14 +454,15 @@ def test_pull_0deg(idtype):

    # test#2: pull only 0deg node
    old = F.randn((2, 5))
-    g.ndata['h'] = old
-    g.pull(0, _message, _reduce, lambda nodes : {'h' : nodes.data['h'] * 2})
-    new = g.ndata['h']
+    g.ndata["h"] = old
+    g.pull(0, _message, _reduce, lambda nodes: {"h": nodes.data["h"] * 2})
+    new = g.ndata["h"]
    # 0deg check: fallback to apply
-    assert F.allclose(new[0], 2*old[0])
+    assert F.allclose(new[0], 2 * old[0])
    # non-0deg check: not touched
    assert F.allclose(new[1], old[1])

+
 def test_dynamic_addition():
    N = 3
    D = 1
@@ -425,201 +472,242 @@ def test_dynamic_addition():

    # Test node addition
    g.add_nodes(N)
-    g.ndata.update({'h1': F.randn((N, D)),
-                    'h2': F.randn((N, D))})
+    g.ndata.update({"h1": F.randn((N, D)), "h2": F.randn((N, D))})
    g.add_nodes(3)
-    assert g.ndata['h1'].shape[0] == g.ndata['h2'].shape[0] == N + 3
+    assert g.ndata["h1"].shape[0] == g.ndata["h2"].shape[0] == N + 3

    # Test edge addition
    g.add_edges(0, 1)
    g.add_edges(1, 0)
-    g.edata.update({'h1': F.randn((2, D)),
-                    'h2': F.randn((2, D))})
-    assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 2
+    g.edata.update({"h1": F.randn((2, D)), "h2": F.randn((2, D))})
+    assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 2

    g.add_edges([0, 2], [2, 0])
-    g.edata['h1'] = F.randn((4, D))
-    assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 4
+    g.edata["h1"] = F.randn((4, D))
+    assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 4

    g.add_edges(1, 2)
-    g.edges[4].data['h1'] = F.randn((1, D))
-    assert g.edata['h1'].shape[0] == g.edata['h2'].shape[0] == 5
+    g.edges[4].data["h1"] = F.randn((1, D))
+    assert g.edata["h1"].shape[0] == g.edata["h2"].shape[0] == 5

    # test add edge with part of the features
-    g.add_edges(2, 1, {'h1': F.randn((1, D))})
-    assert len(g.edata['h1']) == len(g.edata['h2'])
+    g.add_edges(2, 1, {"h1": F.randn((1, D))})
+    assert len(g.edata["h1"]) == len(g.edata["h2"])


 @parametrize_idtype
 def test_repr(idtype):
-    g = dgl.graph(([0, 0, 1], [1, 2, 2]), num_nodes=10, idtype=idtype, device=F.ctx())
+    g = dgl.graph(
+        ([0, 0, 1], [1, 2, 2]), num_nodes=10, idtype=idtype, device=F.ctx()
+    )
    repr_string = g.__repr__()
    print(repr_string)
-    g.ndata['x'] = F.zeros((10, 5))
-    g.edata['y'] = F.zeros((3, 4))
+    g.ndata["x"] = F.zeros((10, 5))
+    g.edata["y"] = F.zeros((3, 4))
    repr_string = g.__repr__()
    print(repr_string)

+
 @parametrize_idtype
 def test_local_var(idtype):
    g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]), idtype=idtype, device=F.ctx())
-    g.ndata['h'] = F.zeros((g.number_of_nodes(), 3))
-    g.edata['w'] = F.zeros((g.number_of_edges(), 4))
+    g.ndata["h"] = F.zeros((g.number_of_nodes(), 3))
+    g.edata["w"] = F.zeros((g.number_of_edges(), 4))
+
    # test override
    def foo(g):
        g = g.local_var()
-        g.ndata['h'] = F.ones((g.number_of_nodes(), 3))
-        g.edata['w'] = F.ones((g.number_of_edges(), 4))
+        g.ndata["h"] = F.ones((g.number_of_nodes(), 3))
+        g.edata["w"] = F.ones((g.number_of_edges(), 4))
+
    foo(g)
-    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
-    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
+    assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
+    assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
+
    # test out-place update
    def foo(g):
        g = g.local_var()
-        g.nodes[[2, 3]].data['h'] = F.ones((2, 3))
-        g.edges[[2, 3]].data['w'] = F.ones((2, 4))
+        g.nodes[[2, 3]].data["h"] = F.ones((2, 3))
+        g.edges[[2, 3]].data["w"] = F.ones((2, 4))
+
    foo(g)
-    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
-    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
+    assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
+    assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
+
    # test out-place update 2
    def foo(g):
        g = g.local_var()
-        g.apply_nodes(lambda nodes: {'h' : nodes.data['h'] + 10}, [2, 3])
-        g.apply_edges(lambda edges: {'w' : edges.data['w'] + 10}, [2, 3])
+        g.apply_nodes(lambda nodes: {"h": nodes.data["h"] + 10}, [2, 3])
+        g.apply_edges(lambda edges: {"w": edges.data["w"] + 10}, [2, 3])
+
    foo(g)
-    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
-    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
+    assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
+    assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
+
    # test auto-pop
    def foo(g):
        g = g.local_var()
-        g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
-        g.edata['ww'] = F.ones((g.number_of_edges(), 4))
+        g.ndata["hh"] = F.ones((g.number_of_nodes(), 3))
+        g.edata["ww"] = F.ones((g.number_of_edges(), 4))
+
    foo(g)
-    assert 'hh' not in g.ndata
-    assert 'ww' not in g.edata
+    assert "hh" not in g.ndata
+    assert "ww" not in g.edata

    # test initializer1
    g = dgl.graph(([0, 1], [1, 1]), idtype=idtype, device=F.ctx())
    g.set_n_initializer(dgl.init.zero_initializer)
+
    def foo(g):
        g = g.local_var()
-        g.nodes[0].data['h'] = F.ones((1, 1))
-        assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
+        g.nodes[0].data["h"] = F.ones((1, 1))
+        assert F.allclose(g.ndata["h"], F.tensor([[1.0], [0.0]]))
+
    foo(g)
+
    # test initializer2
    def foo_e_initializer(shape, dtype, ctx, id_range):
        return F.ones(shape)
-    g.set_e_initializer(foo_e_initializer, field='h')
+
+    g.set_e_initializer(foo_e_initializer, field="h")
+
    def foo(g):
        g = g.local_var()
-        g.edges[0, 1].data['h'] = F.ones((1, 1))
-        assert F.allclose(g.edata['h'], F.ones((2, 1)))
-        g.edges[0, 1].data['w'] = F.ones((1, 1))
-        assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]]))
+        g.edges[0, 1].data["h"] = F.ones((1, 1))
+        assert F.allclose(g.edata["h"], F.ones((2, 1)))
+        g.edges[0, 1].data["w"] = F.ones((1, 1))
+        assert F.allclose(g.edata["w"], F.tensor([[1.0], [0.0]]))
+
    foo(g)

+
 @parametrize_idtype
 def test_local_scope(idtype):
    g = dgl.graph(([0, 1, 2, 3], [1, 2, 3, 4]), idtype=idtype, device=F.ctx())
-    g.ndata['h'] = F.zeros((g.number_of_nodes(), 3))
-    g.edata['w'] = F.zeros((g.number_of_edges(), 4))
+    g.ndata["h"] = F.zeros((g.number_of_nodes(), 3))
+    g.edata["w"] = F.zeros((g.number_of_edges(), 4))
+
    # test override
    def foo(g):
        with g.local_scope():
-            g.ndata['h'] = F.ones((g.number_of_nodes(), 3))
-            g.edata['w'] = F.ones((g.number_of_edges(), 4))
+            g.ndata["h"] = F.ones((g.number_of_nodes(), 3))
+            g.edata["w"] = F.ones((g.number_of_edges(), 4))
+
    foo(g)
-    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
-    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
+    assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
+    assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
+
    # test out-place update
    def foo(g):
        with g.local_scope():
-            g.nodes[[2, 3]].data['h'] = F.ones((2, 3))
-            g.edges[[2, 3]].data['w'] = F.ones((2, 4))
+            g.nodes[[2, 3]].data["h"] = F.ones((2, 3))
+            g.edges[[2, 3]].data["w"] = F.ones((2, 4))
+
    foo(g)
-    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
-    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
+    assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
+    assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
+
    # test out-place update 2
    def foo(g):
        with g.local_scope():
-            g.apply_nodes(lambda nodes: {'h' : nodes.data['h'] + 10}, [2, 3])
-            g.apply_edges(lambda edges: {'w' : edges.data['w'] + 10}, [2, 3])
+            g.apply_nodes(lambda nodes: {"h": nodes.data["h"] + 10}, [2, 3])
+            g.apply_edges(lambda edges: {"w": edges.data["w"] + 10}, [2, 3])
+
    foo(g)
-    assert F.allclose(g.ndata['h'], F.zeros((g.number_of_nodes(), 3)))
-    assert F.allclose(g.edata['w'], F.zeros((g.number_of_edges(), 4)))
+    assert F.allclose(g.ndata["h"], F.zeros((g.number_of_nodes(), 3)))
+    assert F.allclose(g.edata["w"], F.zeros((g.number_of_edges(), 4)))
+
    # test auto-pop
    def foo(g):
        with g.local_scope():
-            g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
-            g.edata['ww'] = F.ones((g.number_of_edges(), 4))
+            g.ndata["hh"] = F.ones((g.number_of_nodes(), 3))
+            g.edata["ww"] = F.ones((g.number_of_edges(), 4))
+
    foo(g)
-    assert 'hh' not in g.ndata
-    assert 'ww' not in g.edata
+    assert "hh" not in g.ndata
+    assert "ww" not in g.edata

    # test nested scope
    def foo(g):
        with g.local_scope():
-            g.ndata['hh'] = F.ones((g.number_of_nodes(), 3))
-            g.edata['ww'] = F.ones((g.number_of_edges(), 4))
+            g.ndata["hh"] = F.ones((g.number_of_nodes(), 3))
+            g.edata["ww"] = F.ones((g.number_of_edges(), 4))
            with g.local_scope():
-                g.ndata['hhh'] = F.ones((g.number_of_nodes(), 3))
-                g.edata['www'] = F.ones((g.number_of_edges(), 4))
-            assert 'hhh' not in g.ndata
-            assert 'www' not in g.edata
+                g.ndata["hhh"] = F.ones((g.number_of_nodes(), 3))
+                g.edata["www"] = F.ones((g.number_of_edges(), 4))
+            assert "hhh" not in g.ndata
+            assert "www" not in g.edata
+
    foo(g)
-    assert 'hh' not in g.ndata
-    assert 'ww' not in g.edata
+    assert "hh" not in g.ndata
+    assert "ww" not in g.edata

    # test initializer1
    g = dgl.graph(([0, 1], [1, 1]), idtype=idtype, device=F.ctx())
    g.set_n_initializer(dgl.init.zero_initializer)
+
    def foo(g):
        with g.local_scope():
-            g.nodes[0].data['h'] = F.ones((1, 1))
-            assert F.allclose(g.ndata['h'], F.tensor([[1.], [0.]]))
+            g.nodes[0].data["h"] = F.ones((1, 1))
+            assert F.allclose(g.ndata["h"], F.tensor([[1.0], [0.0]]))
+
    foo(g)
+
    # test initializer2
    def foo_e_initializer(shape, dtype, ctx, id_range):
        return F.ones(shape)
-    g.set_e_initializer(foo_e_initializer, field='h')
+
+    g.set_e_initializer(foo_e_initializer, field="h")
+
    def foo(g):
        with g.local_scope():
-            g.edges[0, 1].data['h'] = F.ones((1, 1))
-            assert F.allclose(g.edata['h'], F.ones((2, 1)))
-            g.edges[0, 1].data['w'] = F.ones((1, 1))
-            assert F.allclose(g.edata['w'], F.tensor([[1.], [0.]]))
+            g.edges[0, 1].data["h"] = F.ones((1, 1))
+            assert F.allclose(g.edata["h"], F.ones((2, 1)))
+            g.edges[0, 1].data["w"] = F.ones((1, 1))
+            assert F.allclose(g.edata["w"], F.tensor([[1.0], [0.0]]))
+
    foo(g)

+
 @parametrize_idtype
 def test_isolated_nodes(idtype):
    g = dgl.graph(([0, 1], [1, 2]), num_nodes=5, idtype=idtype, device=F.ctx())
    assert g.number_of_nodes() == 5

-    g = dgl.heterograph({
-        ('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])
-    }, {'user': 5, 'game': 7}, idtype=idtype, device=F.ctx())
+    g = dgl.heterograph(
+        {("user", "plays", "game"): ([0, 0, 1], [2, 3, 2])},
+        {"user": 5, "game": 7},
+        idtype=idtype,
+        device=F.ctx(),
+    )
    assert g.idtype == idtype
-    assert g.number_of_nodes('user') == 5
-    assert g.number_of_nodes('game') == 7
+    assert g.number_of_nodes("user") == 5
+    assert g.number_of_nodes("game") == 7

    # Test backward compatibility
-    g = dgl.heterograph({
-        ('user', 'plays', 'game'): ([0, 0, 1], [2, 3, 2])
-    }, {'user': 5, 'game': 7}, idtype=idtype, device=F.ctx())
+    g = dgl.heterograph(
+        {("user", "plays", "game"): ([0, 0, 1], [2, 3, 2])},
+        {"user": 5, "game": 7},
+        idtype=idtype,
+        device=F.ctx(),
+    )
    assert g.idtype == idtype
-    assert g.number_of_nodes('user') == 5
-    assert g.number_of_nodes('game') == 7
+    assert g.number_of_nodes("user") == 5
+    assert g.number_of_nodes("game") == 7
+

 @parametrize_idtype
 def test_send_multigraph(idtype):
    g = dgl.graph(([0, 0, 0, 2], [1, 1, 1, 1]), idtype=idtype, device=F.ctx())

    def _message_a(edges):
-        return {'a': edges.data['a']}
+        return {"a": edges.data["a"]}
+
    def _message_b(edges):
-        return {'a': edges.data['a'] * 3}
+        return {"a": edges.data["a"] * 3}
+
    def _reduce(nodes):
-        return {'a': F.max(nodes.mailbox['a'], 1)}
+        return {"a": F.max(nodes.mailbox["a"], 1)}

    def answer(*args):
        return F.max(F.stack(args, 0), 0)
@@ -629,46 +717,60 @@ def test_send_multigraph(idtype):
    # send by eid
    old_repr = F.randn((4, 5))
    # send_and_recv_on
-    g.ndata['a'] = F.zeros((3, 5))
-    g.edata['a'] = old_repr
+    g.ndata["a"] = F.zeros((3, 5))
+    g.edata["a"] = old_repr
    g.send_and_recv([0, 2, 3], message_func=_message_a, reduce_func=_reduce)
-    new_repr = g.ndata['a']
-    assert F.allclose(new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3]))
+    new_repr = g.ndata["a"]
+    assert F.allclose(
+        new_repr[1], answer(old_repr[0], old_repr[2], old_repr[3])
+    )
    assert F.allclose(new_repr[[0, 2]], F.zeros((2, 5)))

+
 @parametrize_idtype
 def test_issue_1088(idtype):
    # This test ensures that message passing on a heterograph with one edge type
    # would not crash (GitHub issue #1088).
    import dgl.function as fn
-    g = dgl.heterograph({('U', 'E', 'V'): ([0, 1, 2], [1, 2, 3])}, idtype=idtype, device=F.ctx())
-    g.nodes['U'].data['x'] = F.randn((3, 3))
-    g.update_all(fn.copy_u('x', 'm'), fn.sum('m', 'y'))
+
+    g = dgl.heterograph(
+        {("U", "E", "V"): ([0, 1, 2], [1, 2, 3])}, idtype=idtype, device=F.ctx()
+    )
+    g.nodes["U"].data["x"] = F.randn((3, 3))
+    g.update_all(fn.copy_u("x", "m"), fn.sum("m", "y"))
+

 @parametrize_idtype
 def test_degree_bucket_edge_ordering(idtype):
    import dgl.function as fn
+
    g = dgl.graph(
        ([1, 3, 5, 0, 4, 2, 3, 3, 4, 5], [1, 1, 0, 0, 1, 2, 2, 0, 3, 3]),
-        idtype=idtype, device=F.ctx())
-    g.edata['eid'] = F.copy_to(F.arange(0, 10), F.ctx())
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g.edata["eid"] = F.copy_to(F.arange(0, 10), F.ctx())
+
    def reducer(nodes):
-        eid = F.asnumpy(F.copy_to(nodes.mailbox['eid'], F.cpu()))
+        eid = F.asnumpy(F.copy_to(nodes.mailbox["eid"], F.cpu()))
        assert np.array_equal(eid, np.sort(eid, 1))
-        return {'n': F.sum(nodes.mailbox['eid'], 1)}
-    g.update_all(fn.copy_e('eid', 'eid'), reducer)
+        return {"n": F.sum(nodes.mailbox["eid"], 1)}
+
+    g.update_all(fn.copy_e("eid", "eid"), reducer)
+

 @parametrize_idtype
 def test_issue_2484(idtype):
    import dgl.function as fn
+
    g = dgl.graph(([0, 1, 2], [1, 2, 3]), idtype=idtype, device=F.ctx())
    x = F.copy_to(F.randn((4,)), F.ctx())
-    g.ndata['x'] = x
-    g.pull([2, 1], fn.u_add_v('x', 'x', 'm'), fn.sum('m', 'x'))
-    y1 = g.ndata['x']
+    g.ndata["x"] = x
+    g.pull([2, 1], fn.u_add_v("x", "x", "m"), fn.sum("m", "x"))
+    y1 = g.ndata["x"]

-    g.ndata['x'] = x
-    g.pull([1, 2], fn.u_add_v('x', 'x', 'm'), fn.sum('m', 'x'))
-    y2 = g.ndata['x']
+    g.ndata["x"] = x
+    g.pull([1, 2], fn.u_add_v("x", "x", "m"), fn.sum("m", "x"))
+    y2 = g.ndata["x"]

    assert F.allclose(y1, y2)
--- a/tests/python/common/ops/test_edge_softmax.py
+++ b/tests/python/common/ops/test_edge_softmax.py
@@ -4,18 +4,18 @@ import unittest
 from collections import Counter

 import backend as F
+
+import dgl
+import dgl.function as fn
 import networkx as nx
 import numpy as np
 import pytest
 import scipy.sparse as ssp
 import test_utils
-from scipy.sparse import rand
-from test_utils import get_cases, parametrize_idtype
-
-import dgl
-import dgl.function as fn
 from dgl import DGLError
 from dgl.ops import edge_softmax
+from scipy.sparse import rand
+from test_utils import get_cases, parametrize_idtype

 edge_softmax_shapes = [(1,), (1, 3), (3, 4, 5)]
 rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}

--- a/tests/python/common/sampling/test_sampling.py
+++ b/tests/python/common/sampling/test_sampling.py
-import dgl
-import backend as F
-import numpy as np
 import unittest
 from collections import defaultdict
+
+import backend as F
+
+import dgl
+import numpy as np
 import pytest

+
 def check_random_walk(g, metapath, traces, ntypes, prob=None, trace_eids=None):
    traces = F.asnumpy(traces)
    ntypes = F.asnumpy(ntypes)
    for j in range(traces.shape[1] - 1):
        assert ntypes[j] == g.get_ntype_id(g.to_canonical_etype(metapath[j])[0])
-        assert ntypes[j + 1] == g.get_ntype_id(g.to_canonical_etype(metapath[j])[2])
+        assert ntypes[j + 1] == g.get_ntype_id(
+            g.to_canonical_etype(metapath[j])[2]
+        )

    for i in range(traces.shape[0]):
        for j in range(traces.shape[1] - 1):
            assert g.has_edges_between(
-                traces[i, j], traces[i, j+1], etype=metapath[j])
+                traces[i, j], traces[i, j + 1], etype=metapath[j]
+            )
            if prob is not None and prob in g.edges[metapath[j]].data:
-                p = F.asnumpy(g.edges[metapath[j]].data['p'])
-                eids = g.edge_ids(traces[i, j], traces[i, j+1], etype=metapath[j])
+                p = F.asnumpy(g.edges[metapath[j]].data["p"])
+                eids = g.edge_ids(
+                    traces[i, j], traces[i, j + 1], etype=metapath[j]
+                )
                assert p[eids] != 0
            if trace_eids is not None:
                u, v = g.find_edges(trace_eids[i, j], etype=metapath[j])
                assert (u == traces[i, j]) and (v == traces[i, j + 1])

-@pytest.mark.parametrize('use_uva', [True, False])
+
+@pytest.mark.parametrize("use_uva", [True, False])
 def test_non_uniform_random_walk(use_uva):
    if use_uva:
        if F.ctx() == F.cpu():
-            pytest.skip('UVA biased random walk requires a GPU.')
-        if dgl.backend.backend_name != 'pytorch':
-            pytest.skip('UVA biased random walk is only supported with PyTorch.')
-    g2 = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
-        })
-    g4 = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
-            ('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
-            ('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3])
-        })
-
-    g2.edata['p'] = F.copy_to(F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu())
-    g2.edata['p2'] = F.copy_to(F.tensor([[3], [0], [3], [3], [3]], dtype=F.float32), F.cpu())
-    g4.edges['follow'].data['p'] = F.copy_to(F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu())
-    g4.edges['viewed-by'].data['p'] = F.copy_to(F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32), F.cpu())
+            pytest.skip("UVA biased random walk requires a GPU.")
+        if dgl.backend.backend_name != "pytorch":
+            pytest.skip(
+                "UVA biased random walk is only supported with PyTorch."
+            )
+    g2 = dgl.heterograph(
+        {("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])}
+    )
+    g4 = dgl.heterograph(
+        {
+            ("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
+            ("user", "view", "item"): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
+            ("item", "viewed-by", "user"): (
+                [0, 1, 1, 2, 2, 1],
+                [0, 0, 1, 2, 3, 3],
+            ),
+        }
+    )
+
+    g2.edata["p"] = F.copy_to(
+        F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu()
+    )
+    g2.edata["p2"] = F.copy_to(
+        F.tensor([[3], [0], [3], [3], [3]], dtype=F.float32), F.cpu()
+    )
+    g4.edges["follow"].data["p"] = F.copy_to(
+        F.tensor([3, 0, 3, 3, 3], dtype=F.float32), F.cpu()
+    )
+    g4.edges["viewed-by"].data["p"] = F.copy_to(
+        F.tensor([1, 1, 1, 1, 1, 1], dtype=F.float32), F.cpu()
+    )

    if use_uva:
        for g in (g2, g4):
            g.create_formats_()
            g.pin_memory_()
-    elif F._default_context_str == 'gpu':
+    elif F._default_context_str == "gpu":
        g2 = g2.to(F.ctx())
        g4 = g4.to(F.ctx())

    try:
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
-            length=4, prob='p', return_eids=True)
-        check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
+            g2,
+            F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
+            length=4,
+            prob="p",
+            return_eids=True,
+        )
+        check_random_walk(
+            g2, ["follow"] * 4, traces, ntypes, "p", trace_eids=eids
+        )

        with pytest.raises(dgl.DGLError):
            traces, ntypes = dgl.sampling.random_walk(
-                g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
-                length=4, prob='p2')
+                g2,
+                F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
+                length=4,
+                prob="p2",
+            )

-        metapath = ['follow', 'view', 'viewed-by'] * 2
+        metapath = ["follow", "view", "viewed-by"] * 2
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
-            metapath=metapath, prob='p', return_eids=True)
-        check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
+            g4,
+            F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
+            metapath=metapath,
+            prob="p",
+            return_eids=True,
+        )
+        check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids)
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
-            metapath=metapath, prob='p', restart_prob=0., return_eids=True)
-        check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
+            g4,
+            F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
+            metapath=metapath,
+            prob="p",
+            restart_prob=0.0,
+            return_eids=True,
+        )
+        check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids)
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
-            metapath=metapath, prob='p',
-            restart_prob=F.zeros((6,), F.float32, F.ctx()), return_eids=True)
-        check_random_walk(g4, metapath, traces, ntypes, 'p', trace_eids=eids)
+            g4,
+            F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
+            metapath=metapath,
+            prob="p",
+            restart_prob=F.zeros((6,), F.float32, F.ctx()),
+            return_eids=True,
+        )
+        check_random_walk(g4, metapath, traces, ntypes, "p", trace_eids=eids)
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
-            metapath=metapath + ['follow'], prob='p',
-            restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32), return_eids=True)
-        check_random_walk(g4, metapath, traces[:, :7], ntypes[:7], 'p', trace_eids=eids)
+            g4,
+            F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
+            metapath=metapath + ["follow"],
+            prob="p",
+            restart_prob=F.tensor([0, 0, 0, 0, 0, 0, 1], F.float32),
+            return_eids=True,
+        )
+        check_random_walk(
+            g4, metapath, traces[:, :7], ntypes[:7], "p", trace_eids=eids
+        )
        assert (F.asnumpy(traces[:, 7]) == -1).all()
    finally:
        for g in (g2, g4):
            g.unpin_memory_()

-@pytest.mark.parametrize('use_uva', [True, False])
+
+@pytest.mark.parametrize("use_uva", [True, False])
 def test_uniform_random_walk(use_uva):
    if use_uva and F.ctx() == F.cpu():
-        pytest.skip('UVA random walk requires a GPU.')
-    g1 = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0])
-        })
-    g2 = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
-        })
-    g3 = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0]),
-            ('user', 'view', 'item'): ([0, 1, 2], [0, 1, 2]),
-            ('item', 'viewed-by', 'user'): ([0, 1, 2], [0, 1, 2])
-        })
-    g4 = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
-            ('user', 'view', 'item'): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
-            ('item', 'viewed-by', 'user'): ([0, 1, 1, 2, 2, 1], [0, 0, 1, 2, 3, 3])
-        })
+        pytest.skip("UVA random walk requires a GPU.")
+    g1 = dgl.heterograph({("user", "follow", "user"): ([0, 1, 2], [1, 2, 0])})
+    g2 = dgl.heterograph(
+        {("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])}
+    )
+    g3 = dgl.heterograph(
+        {
+            ("user", "follow", "user"): ([0, 1, 2], [1, 2, 0]),
+            ("user", "view", "item"): ([0, 1, 2], [0, 1, 2]),
+            ("item", "viewed-by", "user"): ([0, 1, 2], [0, 1, 2]),
+        }
+    )
+    g4 = dgl.heterograph(
+        {
+            ("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0]),
+            ("user", "view", "item"): ([0, 0, 1, 2, 3, 3], [0, 1, 1, 2, 2, 1]),
+            ("item", "viewed-by", "user"): (
+                [0, 1, 1, 2, 2, 1],
+                [0, 0, 1, 2, 3, 3],
+            ),
+        }
+    )

    if use_uva:
        for g in (g1, g2, g3, g4):
            g.create_formats_()
            g.pin_memory_()
-    elif F._default_context_str == 'gpu':
+    elif F._default_context_str == "gpu":
        g1 = g1.to(F.ctx())
        g2 = g2.to(F.ctx())
        g3 = g3.to(F.ctx())
@@ -121,118 +179,190 @@ def test_uniform_random_walk(use_uva):

    try:
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, return_eids=True)
-        check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
-        if F._default_context_str == 'cpu':
+            g1,
+            F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
+            length=4,
+            return_eids=True,
+        )
+        check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids)
+        if F._default_context_str == "cpu":
            with pytest.raises(dgl.DGLError):
-                dgl.sampling.random_walk(g1, F.tensor([0, 1, 2, 10], dtype=g1.idtype), length=4, return_eids=True)
+                dgl.sampling.random_walk(
+                    g1,
+                    F.tensor([0, 1, 2, 10], dtype=g1.idtype),
+                    length=4,
+                    return_eids=True,
+                )
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, restart_prob=0., return_eids=True)
-        check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
+            g1,
+            F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
+            length=4,
+            restart_prob=0.0,
+            return_eids=True,
+        )
+        check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids)
        traces, ntypes = dgl.sampling.random_walk(
-            g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=4, restart_prob=F.zeros((4,), F.float32))
-        check_random_walk(g1, ['follow'] * 4, traces, ntypes)
+            g1,
+            F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
+            length=4,
+            restart_prob=F.zeros((4,), F.float32),
+        )
+        check_random_walk(g1, ["follow"] * 4, traces, ntypes)
        traces, ntypes = dgl.sampling.random_walk(
-            g1, F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype), length=5,
-            restart_prob=F.tensor([0, 0, 0, 0, 1], dtype=F.float32))
+            g1,
+            F.tensor([0, 1, 2, 0, 1, 2], dtype=g1.idtype),
+            length=5,
+            restart_prob=F.tensor([0, 0, 0, 0, 1], dtype=F.float32),
+        )
        check_random_walk(
-            g1, ['follow'] * 4, F.slice_axis(traces, 1, 0, 5), F.slice_axis(ntypes, 0, 0, 5))
+            g1,
+            ["follow"] * 4,
+            F.slice_axis(traces, 1, 0, 5),
+            F.slice_axis(ntypes, 0, 0, 5),
+        )
        assert (F.asnumpy(traces)[:, 5] == -1).all()

        traces, eids, ntypes = dgl.sampling.random_walk(
-            g2, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype), length=4, return_eids=True)
-        check_random_walk(g2, ['follow'] * 4, traces, ntypes, trace_eids=eids)
-
-        metapath = ['follow', 'view', 'viewed-by'] * 2
+            g2,
+            F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g2.idtype),
+            length=4,
+            return_eids=True,
+        )
+        check_random_walk(g2, ["follow"] * 4, traces, ntypes, trace_eids=eids)
+
+        metapath = ["follow", "view", "viewed-by"] * 2
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g3, F.tensor([0, 1, 2, 0, 1, 2], dtype=g3.idtype), metapath=metapath, return_eids=True)
+            g3,
+            F.tensor([0, 1, 2, 0, 1, 2], dtype=g3.idtype),
+            metapath=metapath,
+            return_eids=True,
+        )
        check_random_walk(g3, metapath, traces, ntypes, trace_eids=eids)

-        metapath = ['follow', 'view', 'viewed-by'] * 2
+        metapath = ["follow", "view", "viewed-by"] * 2
        traces, eids, ntypes = dgl.sampling.random_walk(
-            g4, F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype), metapath=metapath, return_eids=True)
+            g4,
+            F.tensor([0, 1, 2, 3, 0, 1, 2, 3], dtype=g4.idtype),
+            metapath=metapath,
+            return_eids=True,
+        )
        check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)

        traces, eids, ntypes = dgl.sampling.random_walk(
-            g4, F.tensor([0, 1, 2, 0, 1, 2], dtype=g4.idtype), metapath=metapath, return_eids=True)
+            g4,
+            F.tensor([0, 1, 2, 0, 1, 2], dtype=g4.idtype),
+            metapath=metapath,
+            return_eids=True,
+        )
        check_random_walk(g4, metapath, traces, ntypes, trace_eids=eids)
-    finally:    # make sure to unpin the graphs even if some test fails
+    finally:  # make sure to unpin the graphs even if some test fails
        for g in (g1, g2, g3, g4):
            if g.is_pinned():
                g.unpin_memory_()

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU random walk not implemented")
+
+@unittest.skipIf(
+    F._default_context_str == "gpu", reason="GPU random walk not implemented"
+)
 def test_node2vec():
-    g1 = dgl.heterograph({
-        ('user', 'follow', 'user'): ([0, 1, 2], [1, 2, 0])
-        })
-    g2 = dgl.heterograph({
-        ('user', 'follow', 'user'): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])
-        })
-    g2.edata['p'] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)
+    g1 = dgl.heterograph({("user", "follow", "user"): ([0, 1, 2], [1, 2, 0])})
+    g2 = dgl.heterograph(
+        {("user", "follow", "user"): ([0, 1, 1, 2, 3], [1, 2, 3, 0, 0])}
+    )
+    g2.edata["p"] = F.tensor([3, 0, 3, 3, 3], dtype=F.float32)

    ntypes = F.zeros((5,), dtype=F.int64)

-    traces, eids = dgl.sampling.node2vec_random_walk(g1, [0, 1, 2, 0, 1, 2], 1, 1, 4, return_eids=True)
-    check_random_walk(g1, ['follow'] * 4, traces, ntypes, trace_eids=eids)
+    traces, eids = dgl.sampling.node2vec_random_walk(
+        g1, [0, 1, 2, 0, 1, 2], 1, 1, 4, return_eids=True
+    )
+    check_random_walk(g1, ["follow"] * 4, traces, ntypes, trace_eids=eids)

    traces, eids = dgl.sampling.node2vec_random_walk(
-        g2, [0, 1, 2, 3, 0, 1, 2, 3], 1, 1, 4, prob='p', return_eids=True)
-    check_random_walk(g2, ['follow'] * 4, traces, ntypes, 'p', trace_eids=eids)
+        g2, [0, 1, 2, 3, 0, 1, 2, 3], 1, 1, 4, prob="p", return_eids=True
+    )
+    check_random_walk(g2, ["follow"] * 4, traces, ntypes, "p", trace_eids=eids)
+

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU pack traces not implemented")
+@unittest.skipIf(
+    F._default_context_str == "gpu", reason="GPU pack traces not implemented"
+)
 def test_pack_traces():
-    traces, types = (np.array(
-        [[ 0,  1, -1, -1, -1, -1, -1],
-         [ 0,  1,  1,  3,  0,  0,  0]], dtype='int64'),
-        np.array([0, 0, 1, 0, 0, 1, 0], dtype='int64'))
+    traces, types = (
+        np.array(
+            [[0, 1, -1, -1, -1, -1, -1], [0, 1, 1, 3, 0, 0, 0]], dtype="int64"
+        ),
+        np.array([0, 0, 1, 0, 0, 1, 0], dtype="int64"),
+    )
    traces = F.zerocopy_from_numpy(traces)
    types = F.zerocopy_from_numpy(types)
    result = dgl.sampling.pack_traces(traces, types)
-    assert F.array_equal(result[0], F.tensor([0, 1, 0, 1, 1, 3, 0, 0, 0], dtype=F.int64))
-    assert F.array_equal(result[1], F.tensor([0, 0, 0, 0, 1, 0, 0, 1, 0], dtype=F.int64))
+    assert F.array_equal(
+        result[0], F.tensor([0, 1, 0, 1, 1, 3, 0, 0, 0], dtype=F.int64)
+    )
+    assert F.array_equal(
+        result[1], F.tensor([0, 0, 0, 0, 1, 0, 0, 1, 0], dtype=F.int64)
+    )
    assert F.array_equal(result[2], F.tensor([2, 7], dtype=F.int64))
    assert F.array_equal(result[3], F.tensor([0, 2], dtype=F.int64))

-@pytest.mark.parametrize('use_uva', [True, False])
+
+@pytest.mark.parametrize("use_uva", [True, False])
 def test_pinsage_sampling(use_uva):
    if use_uva and F.ctx() == F.cpu():
-        pytest.skip('UVA sampling requires a GPU.')
+        pytest.skip("UVA sampling requires a GPU.")
+
    def _test_sampler(g, sampler, ntype):
        seeds = F.copy_to(F.tensor([0, 2], dtype=g.idtype), F.ctx())
        neighbor_g = sampler(seeds)
        assert neighbor_g.ntypes == [ntype]
-        u, v = neighbor_g.all_edges(form='uv', order='eid')
+        u, v = neighbor_g.all_edges(form="uv", order="eid")
        uv = list(zip(F.asnumpy(u).tolist(), F.asnumpy(v).tolist()))
        assert (1, 0) in uv or (0, 0) in uv
        assert (2, 2) in uv or (3, 2) in uv

-    g = dgl.heterograph({
-        ('item', 'bought-by', 'user'): ([0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 2, 3, 2, 3]),
-        ('user', 'bought', 'item'): ([0, 1, 0, 1, 2, 3, 2, 3], [0, 0, 1, 1, 2, 2, 3, 3])})
+    g = dgl.heterograph(
+        {
+            ("item", "bought-by", "user"): (
+                [0, 0, 1, 1, 2, 2, 3, 3],
+                [0, 1, 0, 1, 2, 3, 2, 3],
+            ),
+            ("user", "bought", "item"): (
+                [0, 1, 0, 1, 2, 3, 2, 3],
+                [0, 0, 1, 1, 2, 2, 3, 3],
+            ),
+        }
+    )
    if use_uva:
        g.create_formats_()
        g.pin_memory_()
-    elif F._default_context_str == 'gpu':
+    elif F._default_context_str == "gpu":
        g = g.to(F.ctx())
    try:
-        sampler = dgl.sampling.PinSAGESampler(g, 'item', 'user', 4, 0.5, 3, 2)
-        _test_sampler(g, sampler, 'item')
-        sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['bought-by', 'bought'])
-        _test_sampler(g, sampler, 'item')
-        sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2,
-            [('item', 'bought-by', 'user'), ('user', 'bought', 'item')])
-        _test_sampler(g, sampler, 'item')
+        sampler = dgl.sampling.PinSAGESampler(g, "item", "user", 4, 0.5, 3, 2)
+        _test_sampler(g, sampler, "item")
+        sampler = dgl.sampling.RandomWalkNeighborSampler(
+            g, 4, 0.5, 3, 2, ["bought-by", "bought"]
+        )
+        _test_sampler(g, sampler, "item")
+        sampler = dgl.sampling.RandomWalkNeighborSampler(
+            g,
+            4,
+            0.5,
+            3,
+            2,
+            [("item", "bought-by", "user"), ("user", "bought", "item")],
+        )
+        _test_sampler(g, sampler, "item")
    finally:
        if g.is_pinned():
            g.unpin_memory_()

-    g = dgl.graph(([0, 0, 1, 1, 2, 2, 3, 3],
-                   [0, 1, 0, 1, 2, 3, 2, 3]))
+    g = dgl.graph(([0, 0, 1, 1, 2, 2, 3, 3], [0, 1, 0, 1, 2, 3, 2, 3]))
    if use_uva:
        g.create_formats_()
        g.pin_memory_()
-    elif F._default_context_str == 'gpu':
+    elif F._default_context_str == "gpu":
        g = g.to(F.ctx())
    try:
        sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2)
@@ -241,69 +371,116 @@ def test_pinsage_sampling(use_uva):
        if g.is_pinned():
            g.unpin_memory_()

-    g = dgl.heterograph({
-        ('A', 'AB', 'B'): ([0, 2], [1, 3]),
-        ('B', 'BC', 'C'): ([1, 3], [2, 1]),
-        ('C', 'CA', 'A'): ([2, 1], [0, 2])})
+    g = dgl.heterograph(
+        {
+            ("A", "AB", "B"): ([0, 2], [1, 3]),
+            ("B", "BC", "C"): ([1, 3], [2, 1]),
+            ("C", "CA", "A"): ([2, 1], [0, 2]),
+        }
+    )
    if use_uva:
        g.create_formats_()
        g.pin_memory_()
-    elif F._default_context_str == 'gpu':
+    elif F._default_context_str == "gpu":
        g = g.to(F.ctx())
    try:
-        sampler = dgl.sampling.RandomWalkNeighborSampler(g, 4, 0.5, 3, 2, ['AB', 'BC', 'CA'])
-        _test_sampler(g, sampler, 'A')
+        sampler = dgl.sampling.RandomWalkNeighborSampler(
+            g, 4, 0.5, 3, 2, ["AB", "BC", "CA"]
+        )
+        _test_sampler(g, sampler, "A")
    finally:
        if g.is_pinned():
            g.unpin_memory_()

+
 def _gen_neighbor_sampling_test_graph(hypersparse, reverse):
    if hypersparse:
        # should crash if allocated a CSR
        card = 1 << 50
-        num_nodes_dict = {'user': card, 'game': card, 'coin': card}
+        num_nodes_dict = {"user": card, "game": card, "coin": card}
    else:
        card = None
        num_nodes_dict = None

    if reverse:
-        g = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0])
-        }, {'user': card if card is not None else 4})
+        g = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [0, 0, 0, 1, 1, 1, 2],
+                    [1, 2, 3, 0, 2, 3, 0],
+                )
+            },
+            {"user": card if card is not None else 4},
+        )
        g = g.to(F.ctx())
-        g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32)
-        g.edata['mask'] = F.tensor([True, True, False, True, True, False, True])
-        hg = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2],
-                                         [1, 2, 3, 0, 2, 3, 0]),
-            ('game', 'play', 'user'): ([0, 1, 2, 2], [0, 0, 1, 3]),
-            ('user', 'liked-by', 'game'): ([0, 1, 2, 0, 3, 0], [2, 2, 2, 1, 1, 0]),
-            ('coin', 'flips', 'user'): ([0, 0, 0, 0], [0, 1, 2, 3])
-        }, num_nodes_dict)
+        g.edata["prob"] = F.tensor(
+            [0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32
+        )
+        g.edata["mask"] = F.tensor([True, True, False, True, True, False, True])
+        hg = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [0, 0, 0, 1, 1, 1, 2],
+                    [1, 2, 3, 0, 2, 3, 0],
+                ),
+                ("game", "play", "user"): ([0, 1, 2, 2], [0, 0, 1, 3]),
+                ("user", "liked-by", "game"): (
+                    [0, 1, 2, 0, 3, 0],
+                    [2, 2, 2, 1, 1, 0],
+                ),
+                ("coin", "flips", "user"): ([0, 0, 0, 0], [0, 1, 2, 3]),
+            },
+            num_nodes_dict,
+        )
        hg = hg.to(F.ctx())
    else:
-        g = dgl.heterograph({
-            ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2])
-        }, {'user': card if card is not None else 4})
+        g = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [1, 2, 3, 0, 2, 3, 0],
+                    [0, 0, 0, 1, 1, 1, 2],
+                )
+            },
+            {"user": card if card is not None else 4},
+        )
        g = g.to(F.ctx())
-        g.edata['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32)
-        g.edata['mask'] = F.tensor([True, True, False, True, True, False, True])
-        hg = dgl.heterograph({
-            ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0],
-                                         [0, 0, 0, 1, 1, 1, 2]),
-            ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
-            ('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
-            ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
-        }, num_nodes_dict)
+        g.edata["prob"] = F.tensor(
+            [0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32
+        )
+        g.edata["mask"] = F.tensor([True, True, False, True, True, False, True])
+        hg = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [1, 2, 3, 0, 2, 3, 0],
+                    [0, 0, 0, 1, 1, 1, 2],
+                ),
+                ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
+                ("game", "liked-by", "user"): (
+                    [2, 2, 2, 1, 1, 0],
+                    [0, 1, 2, 0, 3, 0],
+                ),
+                ("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
+            },
+            num_nodes_dict,
+        )
        hg = hg.to(F.ctx())
-    hg.edges['follow'].data['prob'] = F.tensor([.5, .5, 0., .5, .5, 0., 1.], dtype=F.float32)
-    hg.edges['follow'].data['mask'] = F.tensor([True, True, False, True, True, False, True])
-    hg.edges['play'].data['prob'] = F.tensor([.8, .5, .5, .5], dtype=F.float32)
+    hg.edges["follow"].data["prob"] = F.tensor(
+        [0.5, 0.5, 0.0, 0.5, 0.5, 0.0, 1.0], dtype=F.float32
+    )
+    hg.edges["follow"].data["mask"] = F.tensor(
+        [True, True, False, True, True, False, True]
+    )
+    hg.edges["play"].data["prob"] = F.tensor(
+        [0.8, 0.5, 0.5, 0.5], dtype=F.float32
+    )
    # Leave out the mask of play and liked-by since all of them are True anyway.
-    hg.edges['liked-by'].data['prob'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32)
+    hg.edges["liked-by"].data["prob"] = F.tensor(
+        [0.3, 0.5, 0.2, 0.5, 0.1, 0.1], dtype=F.float32
+    )

    return g, hg

+
 def _gen_neighbor_topk_test_graph(hypersparse, reverse):
    if hypersparse:
        # should crash if allocated a CSR
@@ -312,47 +489,86 @@ def _gen_neighbor_topk_test_graph(hypersparse, reverse):
        card = None

    if reverse:
-        g = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2], [1, 2, 3, 0, 2, 3, 0])
-        })
-        g.edata['weight'] = F.tensor([.5, .3, 0., -5., 22., 0., 1.], dtype=F.float32)
-        hg = dgl.heterograph({
-            ('user', 'follow', 'user'): ([0, 0, 0, 1, 1, 1, 2],
-                                         [1, 2, 3, 0, 2, 3, 0]),
-            ('game', 'play', 'user'): ([0, 1, 2, 2], [0, 0, 1, 3]),
-            ('user', 'liked-by', 'game'): ([0, 1, 2, 0, 3, 0], [2, 2, 2, 1, 1, 0]),
-            ('coin', 'flips', 'user'): ([0, 0, 0, 0], [0, 1, 2, 3])
-        })
+        g = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [0, 0, 0, 1, 1, 1, 2],
+                    [1, 2, 3, 0, 2, 3, 0],
+                )
+            }
+        )
+        g.edata["weight"] = F.tensor(
+            [0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32
+        )
+        hg = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [0, 0, 0, 1, 1, 1, 2],
+                    [1, 2, 3, 0, 2, 3, 0],
+                ),
+                ("game", "play", "user"): ([0, 1, 2, 2], [0, 0, 1, 3]),
+                ("user", "liked-by", "game"): (
+                    [0, 1, 2, 0, 3, 0],
+                    [2, 2, 2, 1, 1, 0],
+                ),
+                ("coin", "flips", "user"): ([0, 0, 0, 0], [0, 1, 2, 3]),
+            }
+        )
    else:
-        g = dgl.heterograph({
-            ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0], [0, 0, 0, 1, 1, 1, 2])
-        })
-        g.edata['weight'] = F.tensor([.5, .3, 0., -5., 22., 0., 1.], dtype=F.float32)
-        hg = dgl.heterograph({
-            ('user', 'follow', 'user'): ([1, 2, 3, 0, 2, 3, 0],
-                                         [0, 0, 0, 1, 1, 1, 2]),
-            ('user', 'play', 'game'): ([0, 0, 1, 3], [0, 1, 2, 2]),
-            ('game', 'liked-by', 'user'): ([2, 2, 2, 1, 1, 0], [0, 1, 2, 0, 3, 0]),
-            ('user', 'flips', 'coin'): ([0, 1, 2, 3], [0, 0, 0, 0])
-        })
-    hg.edges['follow'].data['weight'] = F.tensor([.5, .3, 0., -5., 22., 0., 1.], dtype=F.float32)
-    hg.edges['play'].data['weight'] = F.tensor([.8, .5, .4, .5], dtype=F.float32)
-    hg.edges['liked-by'].data['weight'] = F.tensor([.3, .5, .2, .5, .1, .1], dtype=F.float32)
-    hg.edges['flips'].data['weight'] = F.tensor([10, 2, 13, -1], dtype=F.float32)
+        g = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [1, 2, 3, 0, 2, 3, 0],
+                    [0, 0, 0, 1, 1, 1, 2],
+                )
+            }
+        )
+        g.edata["weight"] = F.tensor(
+            [0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32
+        )
+        hg = dgl.heterograph(
+            {
+                ("user", "follow", "user"): (
+                    [1, 2, 3, 0, 2, 3, 0],
+                    [0, 0, 0, 1, 1, 1, 2],
+                ),
+                ("user", "play", "game"): ([0, 0, 1, 3], [0, 1, 2, 2]),
+                ("game", "liked-by", "user"): (
+                    [2, 2, 2, 1, 1, 0],
+                    [0, 1, 2, 0, 3, 0],
+                ),
+                ("user", "flips", "coin"): ([0, 1, 2, 3], [0, 0, 0, 0]),
+            }
+        )
+    hg.edges["follow"].data["weight"] = F.tensor(
+        [0.5, 0.3, 0.0, -5.0, 22.0, 0.0, 1.0], dtype=F.float32
+    )
+    hg.edges["play"].data["weight"] = F.tensor(
+        [0.8, 0.5, 0.4, 0.5], dtype=F.float32
+    )
+    hg.edges["liked-by"].data["weight"] = F.tensor(
+        [0.3, 0.5, 0.2, 0.5, 0.1, 0.1], dtype=F.float32
+    )
+    hg.edges["flips"].data["weight"] = F.tensor(
+        [10, 2, 13, -1], dtype=F.float32
+    )
    return g, hg

+
 def _test_sample_neighbors(hypersparse, prob):
    g, hg = _gen_neighbor_sampling_test_graph(hypersparse, False)

    def _test1(p, replace):
-        subg = dgl.sampling.sample_neighbors(g, [0, 1], -1, prob=p, replace=replace)
+        subg = dgl.sampling.sample_neighbors(
+            g, [0, 1], -1, prob=p, replace=replace
+        )
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
-        u_ans, v_ans, e_ans = g.in_edges([0, 1], form='all')
+        u_ans, v_ans, e_ans = g.in_edges([0, 1], form="all")
        if p is not None:
            emask = F.gather_row(g.edata[p], e_ans)
-            if p == 'prob':
-                emask = (emask != 0)
+            if p == "prob":
+                emask = emask != 0
            u_ans = F.boolean_mask(u_ans, emask)
            v_ans = F.boolean_mask(v_ans, emask)
        uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
@@ -360,12 +576,17 @@ def _test_sample_neighbors(hypersparse, prob):
        assert uv == uv_ans

        for i in range(10):
-            subg = dgl.sampling.sample_neighbors(g, [0, 1], 2, prob=p, replace=replace)
+            subg = dgl.sampling.sample_neighbors(
+                g, [0, 1], 2, prob=p, replace=replace
+            )
            assert subg.number_of_nodes() == g.number_of_nodes()
            assert subg.number_of_edges() == 4
            u, v = subg.edges()
            assert set(F.asnumpy(F.unique(v))) == {0, 1}
-            assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((4,), dtype=F.int64))
+            assert F.array_equal(
+                F.astype(g.has_edges_between(u, v), F.int64),
+                F.ones((4,), dtype=F.int64),
+            )
            assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
            edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
            if not replace:
@@ -374,18 +595,21 @@ def _test_sample_neighbors(hypersparse, prob):
            if p is not None:
                assert not (3, 0) in edge_set
                assert not (3, 1) in edge_set
-    _test1(prob, True)   # w/ replacement, uniform
+
+    _test1(prob, True)  # w/ replacement, uniform
    _test1(prob, False)  # w/o replacement, uniform

    def _test2(p, replace):  # fanout > #neighbors
-        subg = dgl.sampling.sample_neighbors(g, [0, 2], -1, prob=p, replace=replace)
+        subg = dgl.sampling.sample_neighbors(
+            g, [0, 2], -1, prob=p, replace=replace
+        )
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
-        u_ans, v_ans, e_ans = g.in_edges([0, 2], form='all')
+        u_ans, v_ans, e_ans = g.in_edges([0, 2], form="all")
        if p is not None:
            emask = F.gather_row(g.edata[p], e_ans)
-            if p == 'prob':
-                emask = (emask != 0)
+            if p == "prob":
+                emask = emask != 0
            u_ans = F.boolean_mask(u_ans, emask)
            v_ans = F.boolean_mask(v_ans, emask)
        uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
@@ -393,13 +617,18 @@ def _test_sample_neighbors(hypersparse, prob):
        assert uv == uv_ans

        for i in range(10):
-            subg = dgl.sampling.sample_neighbors(g, [0, 2], 2, prob=p, replace=replace)
+            subg = dgl.sampling.sample_neighbors(
+                g, [0, 2], 2, prob=p, replace=replace
+            )
            assert subg.number_of_nodes() == g.number_of_nodes()
            num_edges = 4 if replace else 3
            assert subg.number_of_edges() == num_edges
            u, v = subg.edges()
            assert set(F.asnumpy(F.unique(v))) == {0, 2}
-            assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((num_edges,), dtype=F.int64))
+            assert F.array_equal(
+                F.astype(g.has_edges_between(u, v), F.int64),
+                F.ones((num_edges,), dtype=F.int64),
+            )
            assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
            edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
            if not replace:
@@ -407,56 +636,65 @@ def _test_sample_neighbors(hypersparse, prob):
                assert len(edge_set) == num_edges
            if p is not None:
                assert not (3, 0) in edge_set
-    _test2(prob, True)   # w/ replacement, uniform
+
+    _test2(prob, True)  # w/ replacement, uniform
    _test2(prob, False)  # w/o replacement, uniform

    def _test3(p, replace):
-        subg = dgl.sampling.sample_neighbors(hg, {'user': [0, 1], 'game': 0}, -1, prob=p, replace=replace)
+        subg = dgl.sampling.sample_neighbors(
+            hg, {"user": [0, 1], "game": 0}, -1, prob=p, replace=replace
+        )
        assert len(subg.ntypes) == 3
        assert len(subg.etypes) == 4
-        assert subg['follow'].number_of_edges() == 6 if p is None else 4
-        assert subg['play'].number_of_edges() == 1
-        assert subg['liked-by'].number_of_edges() == 4
-        assert subg['flips'].number_of_edges() == 0
+        assert subg["follow"].number_of_edges() == 6 if p is None else 4
+        assert subg["play"].number_of_edges() == 1
+        assert subg["liked-by"].number_of_edges() == 4
+        assert subg["flips"].number_of_edges() == 0

        for i in range(10):
-            subg = dgl.sampling.sample_neighbors(hg, {'user' : [0,1], 'game' : 0}, 2, prob=p, replace=replace)
+            subg = dgl.sampling.sample_neighbors(
+                hg, {"user": [0, 1], "game": 0}, 2, prob=p, replace=replace
+            )
            assert len(subg.ntypes) == 3
            assert len(subg.etypes) == 4
-            assert subg['follow'].number_of_edges() == 4
-            assert subg['play'].number_of_edges() == 2 if replace else 1
-            assert subg['liked-by'].number_of_edges() == 4 if replace else 3
-            assert subg['flips'].number_of_edges() == 0
+            assert subg["follow"].number_of_edges() == 4
+            assert subg["play"].number_of_edges() == 2 if replace else 1
+            assert subg["liked-by"].number_of_edges() == 4 if replace else 3
+            assert subg["flips"].number_of_edges() == 0

-    _test3(prob, True)   # w/ replacement, uniform
+    _test3(prob, True)  # w/ replacement, uniform
    _test3(prob, False)  # w/o replacement, uniform

    # test different fanouts for different relations
    for i in range(10):
        subg = dgl.sampling.sample_neighbors(
            hg,
-            {'user' : [0,1], 'game' : 0, 'coin': 0},
-            {'follow': 1, 'play': 2, 'liked-by': 0, 'flips': -1},
-            replace=True)
+            {"user": [0, 1], "game": 0, "coin": 0},
+            {"follow": 1, "play": 2, "liked-by": 0, "flips": -1},
+            replace=True,
+        )
        assert len(subg.ntypes) == 3
        assert len(subg.etypes) == 4
-        assert subg['follow'].number_of_edges() == 2
-        assert subg['play'].number_of_edges() == 2
-        assert subg['liked-by'].number_of_edges() == 0
-        assert subg['flips'].number_of_edges() == 4
+        assert subg["follow"].number_of_edges() == 2
+        assert subg["play"].number_of_edges() == 2
+        assert subg["liked-by"].number_of_edges() == 0
+        assert subg["flips"].number_of_edges() == 4
+

 def _test_sample_neighbors_outedge(hypersparse):
    g, hg = _gen_neighbor_sampling_test_graph(hypersparse, True)

    def _test1(p, replace):
-        subg = dgl.sampling.sample_neighbors(g, [0, 1], -1, prob=p, replace=replace, edge_dir='out')
+        subg = dgl.sampling.sample_neighbors(
+            g, [0, 1], -1, prob=p, replace=replace, edge_dir="out"
+        )
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
-        u_ans, v_ans, e_ans = g.out_edges([0, 1], form='all')
+        u_ans, v_ans, e_ans = g.out_edges([0, 1], form="all")
        if p is not None:
            emask = F.gather_row(g.edata[p], e_ans)
-            if p == 'prob':
-                emask = (emask != 0)
+            if p == "prob":
+                emask = emask != 0
            u_ans = F.boolean_mask(u_ans, emask)
            v_ans = F.boolean_mask(v_ans, emask)
        uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
@@ -464,12 +702,17 @@ def _test_sample_neighbors_outedge(hypersparse):
        assert uv == uv_ans

        for i in range(10):
-            subg = dgl.sampling.sample_neighbors(g, [0, 1], 2, prob=p, replace=replace, edge_dir='out')
+            subg = dgl.sampling.sample_neighbors(
+                g, [0, 1], 2, prob=p, replace=replace, edge_dir="out"
+            )
            assert subg.number_of_nodes() == g.number_of_nodes()
            assert subg.number_of_edges() == 4
            u, v = subg.edges()
            assert set(F.asnumpy(F.unique(u))) == {0, 1}
-            assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((4,), dtype=F.int64))
+            assert F.array_equal(
+                F.astype(g.has_edges_between(u, v), F.int64),
+                F.ones((4,), dtype=F.int64),
+            )
            assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
            edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
            if not replace:
@@ -478,20 +721,23 @@ def _test_sample_neighbors_outedge(hypersparse):
            if p is not None:
                assert not (0, 3) in edge_set
                assert not (1, 3) in edge_set
-    _test1(None, True)   # w/ replacement, uniform
+
+    _test1(None, True)  # w/ replacement, uniform
    _test1(None, False)  # w/o replacement, uniform
-    _test1('prob', True)   # w/ replacement
-    _test1('prob', False)  # w/o replacement
+    _test1("prob", True)  # w/ replacement
+    _test1("prob", False)  # w/o replacement

    def _test2(p, replace):  # fanout > #neighbors
-        subg = dgl.sampling.sample_neighbors(g, [0, 2], -1, prob=p, replace=replace, edge_dir='out')
+        subg = dgl.sampling.sample_neighbors(
+            g, [0, 2], -1, prob=p, replace=replace, edge_dir="out"
+        )
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
-        u_ans, v_ans, e_ans = g.out_edges([0, 2], form='all')
+        u_ans, v_ans, e_ans = g.out_edges([0, 2], form="all")
        if p is not None:
            emask = F.gather_row(g.edata[p], e_ans)
-            if p == 'prob':
-                emask = (emask != 0)
+            if p == "prob":
+                emask = emask != 0
            u_ans = F.boolean_mask(u_ans, emask)
            v_ans = F.boolean_mask(v_ans, emask)
        uv = set(zip(F.asnumpy(u), F.asnumpy(v)))
@@ -499,13 +745,18 @@ def _test_sample_neighbors_outedge(hypersparse):
        assert uv == uv_ans

        for i in range(10):
-            subg = dgl.sampling.sample_neighbors(g, [0, 2], 2, prob=p, replace=replace, edge_dir='out')
+            subg = dgl.sampling.sample_neighbors(
+                g, [0, 2], 2, prob=p, replace=replace, edge_dir="out"
+            )
            assert subg.number_of_nodes() == g.number_of_nodes()
            num_edges = 4 if replace else 3
            assert subg.number_of_edges() == num_edges
            u, v = subg.edges()
            assert set(F.asnumpy(F.unique(u))) == {0, 2}
-            assert F.array_equal(F.astype(g.has_edges_between(u, v), F.int64), F.ones((num_edges,), dtype=F.int64))
+            assert F.array_equal(
+                F.astype(g.has_edges_between(u, v), F.int64),
+                F.ones((num_edges,), dtype=F.int64),
+            )
            assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
            edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
            if not replace:
@@ -513,39 +764,55 @@ def _test_sample_neighbors_outedge(hypersparse):
                assert len(edge_set) == num_edges
            if p is not None:
                assert not (0, 3) in edge_set
-    _test2(None, True)   # w/ replacement, uniform
+
+    _test2(None, True)  # w/ replacement, uniform
    _test2(None, False)  # w/o replacement, uniform
-    _test2('prob', True)   # w/ replacement
-    _test2('prob', False)  # w/o replacement
+    _test2("prob", True)  # w/ replacement
+    _test2("prob", False)  # w/o replacement

    def _test3(p, replace):
-        subg = dgl.sampling.sample_neighbors(hg, {'user': [0, 1], 'game': 0}, -1, prob=p, replace=replace, edge_dir='out')
+        subg = dgl.sampling.sample_neighbors(
+            hg,
+            {"user": [0, 1], "game": 0},
+            -1,
+            prob=p,
+            replace=replace,
+            edge_dir="out",
+        )
        assert len(subg.ntypes) == 3
        assert len(subg.etypes) == 4
-        assert subg['follow'].number_of_edges() == 6 if p is None else 4
-        assert subg['play'].number_of_edges() == 1
-        assert subg['liked-by'].number_of_edges() == 4
-        assert subg['flips'].number_of_edges() == 0
+        assert subg["follow"].number_of_edges() == 6 if p is None else 4
+        assert subg["play"].number_of_edges() == 1
+        assert subg["liked-by"].number_of_edges() == 4
+        assert subg["flips"].number_of_edges() == 0

        for i in range(10):
-            subg = dgl.sampling.sample_neighbors(hg, {'user' : [0,1], 'game' : 0}, 2, prob=p, replace=replace, edge_dir='out')
+            subg = dgl.sampling.sample_neighbors(
+                hg,
+                {"user": [0, 1], "game": 0},
+                2,
+                prob=p,
+                replace=replace,
+                edge_dir="out",
+            )
            assert len(subg.ntypes) == 3
            assert len(subg.etypes) == 4
-            assert subg['follow'].number_of_edges() == 4
-            assert subg['play'].number_of_edges() == 2 if replace else 1
-            assert subg['liked-by'].number_of_edges() == 4 if replace else 3
-            assert subg['flips'].number_of_edges() == 0
+            assert subg["follow"].number_of_edges() == 4
+            assert subg["play"].number_of_edges() == 2 if replace else 1
+            assert subg["liked-by"].number_of_edges() == 4 if replace else 3
+            assert subg["flips"].number_of_edges() == 0

-    _test3(None, True)   # w/ replacement, uniform
+    _test3(None, True)  # w/ replacement, uniform
    _test3(None, False)  # w/o replacement, uniform
-    _test3('prob', True)   # w/ replacement
-    _test3('prob', False)  # w/o replacement
+    _test3("prob", True)  # w/ replacement
+    _test3("prob", False)  # w/o replacement
+

 def _test_sample_neighbors_topk(hypersparse):
    g, hg = _gen_neighbor_topk_test_graph(hypersparse, False)

    def _test1():
-        subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 1])
+        subg = dgl.sampling.select_topk(g, -1, "weight", [0, 1])
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
        u_ans, v_ans = subg.in_edges([0, 1])
@@ -553,17 +820,18 @@ def _test_sample_neighbors_topk(hypersparse):
        uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
        assert uv == uv_ans

-        subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 1])
+        subg = dgl.sampling.select_topk(g, 2, "weight", [0, 1])
        assert subg.number_of_nodes() == g.number_of_nodes()
        assert subg.number_of_edges() == 4
        u, v = subg.edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
        assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
-        assert edge_set == {(2,0),(1,0),(2,1),(3,1)}
+        assert edge_set == {(2, 0), (1, 0), (2, 1), (3, 1)}
+
    _test1()

    def _test2():  # k > #neighbors
-        subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 2])
+        subg = dgl.sampling.select_topk(g, -1, "weight", [0, 2])
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
        u_ans, v_ans = subg.in_edges([0, 2])
@@ -571,49 +839,64 @@ def _test_sample_neighbors_topk(hypersparse):
        uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
        assert uv == uv_ans

-        subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 2])
+        subg = dgl.sampling.select_topk(g, 2, "weight", [0, 2])
        assert subg.number_of_nodes() == g.number_of_nodes()
        assert subg.number_of_edges() == 3
        u, v = subg.edges()
        assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
-        assert edge_set == {(2,0),(1,0),(0,2)}
+        assert edge_set == {(2, 0), (1, 0), (0, 2)}
+
    _test2()

    def _test3():
-        subg = dgl.sampling.select_topk(hg, 2, 'weight', {'user' : [0,1], 'game' : 0})
+        subg = dgl.sampling.select_topk(
+            hg, 2, "weight", {"user": [0, 1], "game": 0}
+        )
        assert len(subg.ntypes) == 3
        assert len(subg.etypes) == 4
-        u, v = subg['follow'].edges()
+        u, v = subg["follow"].edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
-        assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
-        assert edge_set == {(2,0),(1,0),(2,1),(3,1)}
-        u, v = subg['play'].edges()
+        assert F.array_equal(
+            hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
+        )
+        assert edge_set == {(2, 0), (1, 0), (2, 1), (3, 1)}
+        u, v = subg["play"].edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
-        assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
-        assert edge_set == {(0,0)}
-        u, v = subg['liked-by'].edges()
+        assert F.array_equal(
+            hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID]
+        )
+        assert edge_set == {(0, 0)}
+        u, v = subg["liked-by"].edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
-        assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
-        assert edge_set == {(2,0),(2,1),(1,0)}
-        assert subg['flips'].number_of_edges() == 0
+        assert F.array_equal(
+            hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
+        )
+        assert edge_set == {(2, 0), (2, 1), (1, 0)}
+        assert subg["flips"].number_of_edges() == 0
+
    _test3()

    # test different k for different relations
    subg = dgl.sampling.select_topk(
-        hg, {'follow': 1, 'play': 2, 'liked-by': 0, 'flips': -1}, 'weight', {'user' : [0,1], 'game' : 0, 'coin': 0})
+        hg,
+        {"follow": 1, "play": 2, "liked-by": 0, "flips": -1},
+        "weight",
+        {"user": [0, 1], "game": 0, "coin": 0},
+    )
    assert len(subg.ntypes) == 3
    assert len(subg.etypes) == 4
-    assert subg['follow'].number_of_edges() == 2
-    assert subg['play'].number_of_edges() == 1
-    assert subg['liked-by'].number_of_edges() == 0
-    assert subg['flips'].number_of_edges() == 4
+    assert subg["follow"].number_of_edges() == 2
+    assert subg["play"].number_of_edges() == 1
+    assert subg["liked-by"].number_of_edges() == 0
+    assert subg["flips"].number_of_edges() == 4
+

 def _test_sample_neighbors_topk_outedge(hypersparse):
    g, hg = _gen_neighbor_topk_test_graph(hypersparse, True)

    def _test1():
-        subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 1], edge_dir='out')
+        subg = dgl.sampling.select_topk(g, -1, "weight", [0, 1], edge_dir="out")
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
        u_ans, v_ans = subg.out_edges([0, 1])
@@ -621,17 +904,18 @@ def _test_sample_neighbors_topk_outedge(hypersparse):
        uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
        assert uv == uv_ans

-        subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 1], edge_dir='out')
+        subg = dgl.sampling.select_topk(g, 2, "weight", [0, 1], edge_dir="out")
        assert subg.number_of_nodes() == g.number_of_nodes()
        assert subg.number_of_edges() == 4
        u, v = subg.edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
        assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
-        assert edge_set == {(0,2),(0,1),(1,2),(1,3)}
+        assert edge_set == {(0, 2), (0, 1), (1, 2), (1, 3)}
+
    _test1()

    def _test2():  # k > #neighbors
-        subg = dgl.sampling.select_topk(g, -1, 'weight', [0, 2], edge_dir='out')
+        subg = dgl.sampling.select_topk(g, -1, "weight", [0, 2], edge_dir="out")
        assert subg.number_of_nodes() == g.number_of_nodes()
        u, v = subg.edges()
        u_ans, v_ans = subg.out_edges([0, 2])
@@ -639,118 +923,177 @@ def _test_sample_neighbors_topk_outedge(hypersparse):
        uv_ans = set(zip(F.asnumpy(u_ans), F.asnumpy(v_ans)))
        assert uv == uv_ans

-        subg = dgl.sampling.select_topk(g, 2, 'weight', [0, 2], edge_dir='out')
+        subg = dgl.sampling.select_topk(g, 2, "weight", [0, 2], edge_dir="out")
        assert subg.number_of_nodes() == g.number_of_nodes()
        assert subg.number_of_edges() == 3
        u, v = subg.edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
        assert F.array_equal(g.edge_ids(u, v), subg.edata[dgl.EID])
-        assert edge_set == {(0,2),(0,1),(2,0)}
+        assert edge_set == {(0, 2), (0, 1), (2, 0)}
+
    _test2()

    def _test3():
-        subg = dgl.sampling.select_topk(hg, 2, 'weight', {'user' : [0,1], 'game' : 0}, edge_dir='out')
+        subg = dgl.sampling.select_topk(
+            hg, 2, "weight", {"user": [0, 1], "game": 0}, edge_dir="out"
+        )
        assert len(subg.ntypes) == 3
        assert len(subg.etypes) == 4
-        u, v = subg['follow'].edges()
+        u, v = subg["follow"].edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
-        assert F.array_equal(hg['follow'].edge_ids(u, v), subg['follow'].edata[dgl.EID])
-        assert edge_set == {(0,2),(0,1),(1,2),(1,3)}
-        u, v = subg['play'].edges()
+        assert F.array_equal(
+            hg["follow"].edge_ids(u, v), subg["follow"].edata[dgl.EID]
+        )
+        assert edge_set == {(0, 2), (0, 1), (1, 2), (1, 3)}
+        u, v = subg["play"].edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
-        assert F.array_equal(hg['play'].edge_ids(u, v), subg['play'].edata[dgl.EID])
-        assert edge_set == {(0,0)}
-        u, v = subg['liked-by'].edges()
+        assert F.array_equal(
+            hg["play"].edge_ids(u, v), subg["play"].edata[dgl.EID]
+        )
+        assert edge_set == {(0, 0)}
+        u, v = subg["liked-by"].edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
-        assert F.array_equal(hg['liked-by'].edge_ids(u, v), subg['liked-by'].edata[dgl.EID])
-        assert edge_set == {(0,2),(1,2),(0,1)}
-        assert subg['flips'].number_of_edges() == 0
+        assert F.array_equal(
+            hg["liked-by"].edge_ids(u, v), subg["liked-by"].edata[dgl.EID]
+        )
+        assert edge_set == {(0, 2), (1, 2), (0, 1)}
+        assert subg["flips"].number_of_edges() == 0
+
    _test3()

+
 def test_sample_neighbors_noprob():
    _test_sample_neighbors(False, None)
-    #_test_sample_neighbors(True)
+    # _test_sample_neighbors(True)
+

 def test_sample_neighbors_prob():
-    _test_sample_neighbors(False, 'prob')
-    #_test_sample_neighbors(True)
+    _test_sample_neighbors(False, "prob")
+    # _test_sample_neighbors(True)
+

 def test_sample_neighbors_outedge():
    _test_sample_neighbors_outedge(False)
-    #_test_sample_neighbors_outedge(True)
+    # _test_sample_neighbors_outedge(True)
+

-@unittest.skipIf(F.backend_name == 'mxnet', reason='MXNet has problem converting bool arrays')
-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors with mask not implemented")
+@unittest.skipIf(
+    F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays"
+)
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU sample neighbors with mask not implemented",
+)
 def test_sample_neighbors_mask():
-    _test_sample_neighbors(False, 'mask')
+    _test_sample_neighbors(False, "mask")

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
+
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU sample neighbors not implemented",
+)
 def test_sample_neighbors_topk():
    _test_sample_neighbors_topk(False)
-    #_test_sample_neighbors_topk(True)
+    # _test_sample_neighbors_topk(True)
+

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU sample neighbors not implemented",
+)
 def test_sample_neighbors_topk_outedge():
    _test_sample_neighbors_topk_outedge(False)
-    #_test_sample_neighbors_topk_outedge(True)
+    # _test_sample_neighbors_topk_outedge(True)
+

 def test_sample_neighbors_with_0deg():
    g = dgl.graph(([], []), num_nodes=5).to(F.ctx())
-    sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='in', replace=False)
+    sg = dgl.sampling.sample_neighbors(
+        g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="in", replace=False
+    )
    assert sg.number_of_edges() == 0
-    sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='in', replace=True)
+    sg = dgl.sampling.sample_neighbors(
+        g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="in", replace=True
+    )
    assert sg.number_of_edges() == 0
-    sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='out', replace=False)
+    sg = dgl.sampling.sample_neighbors(
+        g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="out", replace=False
+    )
    assert sg.number_of_edges() == 0
-    sg = dgl.sampling.sample_neighbors(g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir='out', replace=True)
+    sg = dgl.sampling.sample_neighbors(
+        g, F.tensor([1, 2], dtype=F.int64), 2, edge_dir="out", replace=True
+    )
    assert sg.number_of_edges() == 0

+
 def create_test_graph(num_nodes, num_edges_per_node, bipartite=False):
    src = np.concatenate(
-        [np.array([i] * num_edges_per_node) for i in range(num_nodes)])
+        [np.array([i] * num_edges_per_node) for i in range(num_nodes)]
+    )
    dst = np.concatenate(
-        [np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(num_nodes)]
+        [
+            np.random.choice(num_nodes, num_edges_per_node, replace=False)
+            for i in range(num_nodes)
+        ]
    )
    if bipartite:
-        g = dgl.heterograph({("u", "e", "v") : (src, dst)})
+        g = dgl.heterograph({("u", "e", "v"): (src, dst)})
    else:
        g = dgl.graph((src, dst))
    return g

+
 def create_etype_test_graph(num_nodes, num_edges_per_node, rare_cnt):
    src = np.concatenate(
-        [np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(num_nodes)]
+        [
+            np.random.choice(num_nodes, num_edges_per_node, replace=False)
+            for i in range(num_nodes)
+        ]
    )
    dst = np.concatenate(
-        [np.array([i] * num_edges_per_node) for i in range(num_nodes)])
+        [np.array([i] * num_edges_per_node) for i in range(num_nodes)]
+    )

    minor_src = np.concatenate(
-        [np.random.choice(num_nodes, 2, replace=False) for i in range(num_nodes)]
+        [
+            np.random.choice(num_nodes, 2, replace=False)
+            for i in range(num_nodes)
+        ]
    )
-    minor_dst = np.concatenate(
-        [np.array([i] * 2) for i in range(num_nodes)])
+    minor_dst = np.concatenate([np.array([i] * 2) for i in range(num_nodes)])

    most_zero_src = np.concatenate(
-        [np.random.choice(num_nodes, num_edges_per_node, replace=False) for i in range(rare_cnt)]
+        [
+            np.random.choice(num_nodes, num_edges_per_node, replace=False)
+            for i in range(rare_cnt)
+        ]
    )
    most_zero_dst = np.concatenate(
-        [np.array([i] * num_edges_per_node) for i in range(rare_cnt)])
-
+        [np.array([i] * num_edges_per_node) for i in range(rare_cnt)]
+    )

-    g = dgl.heterograph({("v", "e_major", "u") : (src, dst),
-                         ("u", "e_major_rev", "v") : (dst, src),
-                         ("v2", "e_minor", "u") : (minor_src, minor_dst),
-                         ("v2", "most_zero", "u") : (most_zero_src, most_zero_dst),
-                         ("u", "e_minor_rev", "v2") : (minor_dst, minor_src)})
+    g = dgl.heterograph(
+        {
+            ("v", "e_major", "u"): (src, dst),
+            ("u", "e_major_rev", "v"): (dst, src),
+            ("v2", "e_minor", "u"): (minor_src, minor_dst),
+            ("v2", "most_zero", "u"): (most_zero_src, most_zero_dst),
+            ("u", "e_minor_rev", "v2"): (minor_dst, minor_src),
+        }
+    )
    for etype in g.etypes:
        prob = np.random.rand(g.num_edges(etype))
        prob[prob > 0.2] = 0
-        g.edges[etype].data['p'] = F.zerocopy_from_numpy(prob)
-        g.edges[etype].data['mask'] = F.zerocopy_from_numpy(prob != 0)
+        g.edges[etype].data["p"] = F.zerocopy_from_numpy(prob)
+        g.edges[etype].data["mask"] = F.zerocopy_from_numpy(prob != 0)

    return g

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
+
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU sample neighbors not implemented",
+)
 def test_sample_neighbors_biased_homogeneous():
    g = create_test_graph(100, 30)

@@ -769,7 +1112,9 @@ def test_sample_neighbors_biased_homogeneous():
    # inedge / without replacement
    g_sorted = dgl.sort_csc_by_tag(g, tag)
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, replace=False)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.nodes(), 5, bias, replace=False
+        )
        check_num(subg.edges()[0], tag)
        u, v = subg.edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
@@ -777,13 +1122,17 @@ def test_sample_neighbors_biased_homogeneous():

    # inedge / with replacement
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, replace=True)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.nodes(), 5, bias, replace=True
+        )
        check_num(subg.edges()[0], tag)

    # outedge / without replacement
    g_sorted = dgl.sort_csr_by_tag(g, tag)
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, edge_dir='out', replace=False)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.nodes(), 5, bias, edge_dir="out", replace=False
+        )
        check_num(subg.edges()[1], tag)
        u, v = subg.edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
@@ -791,14 +1140,21 @@ def test_sample_neighbors_biased_homogeneous():

    # outedge / with replacement
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.nodes(), 5, bias, edge_dir='out', replace=True)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.nodes(), 5, bias, edge_dir="out", replace=True
+        )
        check_num(subg.edges()[1], tag)

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
+
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU sample neighbors not implemented",
+)
 def test_sample_neighbors_biased_bipartite():
    g = create_test_graph(100, 30, True)
    num_dst = g.number_of_dst_nodes()
    bias = F.tensor([0, 0.01, 10, 10], dtype=F.float32)
+
    def check_num(nodes, tag):
        nodes, tag = F.asnumpy(nodes), F.asnumpy(tag)
        cnt = [sum(tag[nodes] == i) for i in range(4)]
@@ -813,7 +1169,9 @@ def test_sample_neighbors_biased_bipartite():
    tag = F.tensor(np.random.choice(4, 100))
    g_sorted = dgl.sort_csc_by_tag(g, tag)
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.dstnodes(), 5, bias, replace=False)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.dstnodes(), 5, bias, replace=False
+        )
        check_num(subg.edges()[0], tag)
        u, v = subg.edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
@@ -821,14 +1179,18 @@ def test_sample_neighbors_biased_bipartite():

    # inedge / with replacement
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.dstnodes(), 5, bias, replace=True)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.dstnodes(), 5, bias, replace=True
+        )
        check_num(subg.edges()[0], tag)

    # outedge / without replacement
    tag = F.tensor(np.random.choice(4, num_dst))
    g_sorted = dgl.sort_csr_by_tag(g, tag)
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.srcnodes(), 5, bias, edge_dir='out', replace=False)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.srcnodes(), 5, bias, edge_dir="out", replace=False
+        )
        check_num(subg.edges()[1], tag)
        u, v = subg.edges()
        edge_set = set(zip(list(F.asnumpy(u)), list(F.asnumpy(v))))
@@ -836,23 +1198,31 @@ def test_sample_neighbors_biased_bipartite():

    # outedge / with replacement
    for _ in range(5):
-        subg = dgl.sampling.sample_neighbors_biased(g_sorted, g.srcnodes(), 5, bias, edge_dir='out', replace=True)
+        subg = dgl.sampling.sample_neighbors_biased(
+            g_sorted, g.srcnodes(), 5, bias, edge_dir="out", replace=True
+        )
        check_num(subg.edges()[1], tag)

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
-@unittest.skipIf(F.backend_name == 'mxnet', reason='MXNet has problem converting bool arrays')
-@pytest.mark.parametrize('format_', ['coo', 'csr', 'csc'])
-@pytest.mark.parametrize('direction', ['in', 'out'])
-@pytest.mark.parametrize('replace', [False, True])
+
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU sample neighbors not implemented",
+)
+@unittest.skipIf(
+    F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays"
+)
+@pytest.mark.parametrize("format_", ["coo", "csr", "csc"])
+@pytest.mark.parametrize("direction", ["in", "out"])
+@pytest.mark.parametrize("replace", [False, True])
 def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
    num_nodes = 100
    rare_cnt = 4
    g = create_etype_test_graph(100, 30, rare_cnt)
-    h_g = dgl.to_homogeneous(g, edata=['p', 'mask'])
+    h_g = dgl.to_homogeneous(g, edata=["p", "mask"])
    h_g_etype = F.asnumpy(h_g.edata[dgl.ETYPE])
    h_g_offset = np.cumsum(np.insert(np.bincount(h_g_etype), 0, 0)).tolist()
-    sg = g.edge_subgraph(g.edata['mask'], relabel_nodes=False)
-    h_sg = h_g.edge_subgraph(h_g.edata['mask'], relabel_nodes=False)
+    sg = g.edge_subgraph(g.edata["mask"], relabel_nodes=False)
+    h_sg = h_g.edge_subgraph(h_g.edata["mask"], relabel_nodes=False)
    h_sg_etype = F.asnumpy(h_sg.edata[dgl.ETYPE])
    h_sg_offset = np.cumsum(np.insert(np.bincount(h_sg_etype), 0, 0)).tolist()

@@ -883,7 +1253,7 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
            all_dst_per_etype.append(all_dst[all_etype_array == etype])

        if replace:
-            if direction == 'in':
+            if direction == "in":
                in_degree_per_etype = [np.bincount(d) for d in dst_per_etype]
                for etype in range(len(fanouts)):
                    in_degree = in_degree_per_etype[etype]
@@ -902,7 +1272,7 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
                        ans[all_src_per_etype[etype]] = fanout
                    assert np.all(out_degree == ans)
        else:
-            if direction == 'in':
+            if direction == "in":
                for v in set(dst):
                    u = src[dst == v]
                    et = etype_array[dst == v]
@@ -911,7 +1281,9 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
                    for etype in set(et):
                        u_etype = set(u[et == etype])
                        all_u_etype = set(all_u[all_et == etype])
-                        assert (len(u_etype) == fanouts[etype]) or (u_etype == all_u_etype)
+                        assert (len(u_etype) == fanouts[etype]) or (
+                            u_etype == all_u_etype
+                        )
            else:
                for u in set(src):
                    v = dst[src == u]
@@ -921,36 +1293,59 @@ def test_sample_neighbors_etype_homogeneous(format_, direction, replace):
                    for etype in set(et):
                        v_etype = set(v[et == etype])
                        all_v_etype = set(all_v[all_et == etype])
-                        assert (len(v_etype) == fanouts[etype]) or (v_etype == all_v_etype)
+                        assert (len(v_etype) == fanouts[etype]) or (
+                            v_etype == all_v_etype
+                        )

    all_src, all_dst = h_g.edges()
    all_sub_src, all_sub_dst = h_sg.edges()
    h_g = h_g.formats(format_)
-    if (direction, format_) in [('in', 'csr'), ('out', 'csc')]:
-        h_g = h_g.formats(['csc', 'csr', 'coo'])
+    if (direction, format_) in [("in", "csr"), ("out", "csc")]:
+        h_g = h_g.formats(["csc", "csr", "coo"])
    for _ in range(5):
        subg = dgl.sampling.sample_etype_neighbors(
-            h_g, seeds, h_g_offset, fanouts, replace=replace,
-            edge_dir=direction)
+            h_g, seeds, h_g_offset, fanouts, replace=replace, edge_dir=direction
+        )
        check_num(h_g, all_src, all_dst, subg, replace, fanouts, direction)

-        p = [g.edges[etype].data['p'] for etype in g.etypes]
+        p = [g.edges[etype].data["p"] for etype in g.etypes]
        subg = dgl.sampling.sample_etype_neighbors(
-            h_g, seeds, h_g_offset, fanouts, replace=replace,
-            edge_dir=direction, prob=p)
-        check_num(h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction)
-
-        p = [g.edges[etype].data['mask'] for etype in g.etypes]
+            h_g,
+            seeds,
+            h_g_offset,
+            fanouts,
+            replace=replace,
+            edge_dir=direction,
+            prob=p,
+        )
+        check_num(
+            h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction
+        )
+
+        p = [g.edges[etype].data["mask"] for etype in g.etypes]
        subg = dgl.sampling.sample_etype_neighbors(
-            h_g, seeds, h_g_offset, fanouts, replace=replace,
-            edge_dir=direction, prob=p)
-        check_num(h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction)
-
-
-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU sample neighbors not implemented")
-@unittest.skipIf(F.backend_name == 'mxnet', reason='MXNet has problem converting bool arrays')
-@pytest.mark.parametrize('format_', ['csr', 'csc'])
-@pytest.mark.parametrize('direction', ['in', 'out'])
+            h_g,
+            seeds,
+            h_g_offset,
+            fanouts,
+            replace=replace,
+            edge_dir=direction,
+            prob=p,
+        )
+        check_num(
+            h_sg, all_sub_src, all_sub_dst, subg, replace, fanouts, direction
+        )
+
+
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU sample neighbors not implemented",
+)
+@unittest.skipIf(
+    F.backend_name == "mxnet", reason="MXNet has problem converting bool arrays"
+)
+@pytest.mark.parametrize("format_", ["csr", "csc"])
+@pytest.mark.parametrize("direction", ["in", "out"])
 def test_sample_neighbors_etype_sorted_homogeneous(format_, direction):
    rare_cnt = 4
    g = create_etype_test_graph(100, 30, rare_cnt)
@@ -959,33 +1354,49 @@ def test_sample_neighbors_etype_sorted_homogeneous(format_, direction):
    seeds = F.nonzero_1d(h_g.ndata[dgl.NTYPE] == seed_ntype)
    fanouts = F.tensor([6, 5, -1, 3, 2], dtype=F.int64)
    h_g = h_g.formats(format_)
-    if (direction, format_) in [('in', 'csr'), ('out', 'csc')]:
-        h_g = h_g.formats(['csc', 'csr', 'coo'])
+    if (direction, format_) in [("in", "csr"), ("out", "csc")]:
+        h_g = h_g.formats(["csc", "csr", "coo"])

-    if direction == 'in':
-        h_g = dgl.sort_csc_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type='edge')
+    if direction == "in":
+        h_g = dgl.sort_csc_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type="edge")
    else:
-        h_g = dgl.sort_csr_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type='edge')
+        h_g = dgl.sort_csr_by_tag(h_g, h_g.edata[dgl.ETYPE], tag_type="edge")
    # shuffle
    h_g_etype = F.asnumpy(h_g.edata[dgl.ETYPE])
    h_g_offset = np.cumsum(np.insert(np.bincount(h_g_etype), 0, 0)).tolist()
    sg = dgl.sampling.sample_etype_neighbors(
-        h_g, seeds, h_g_offset, fanouts, edge_dir=direction, etype_sorted=True)
+        h_g, seeds, h_g_offset, fanouts, edge_dir=direction, etype_sorted=True
+    )

-@pytest.mark.parametrize('dtype', ['int32', 'int64'])
+
+@pytest.mark.parametrize("dtype", ["int32", "int64"])
 def test_sample_neighbors_exclude_edges_heteroG(dtype):
-    d_i_d_u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300, size=100, dtype=dtype)))
-    d_i_d_v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=d_i_d_u_nodes.shape, dtype=dtype))
-    d_i_g_u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300, size=100, dtype=dtype)))
-    d_i_g_v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=d_i_g_u_nodes.shape, dtype=dtype))
-    d_t_d_u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300, size=100, dtype=dtype)))
-    d_t_d_v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=d_t_d_u_nodes.shape, dtype=dtype))
-
-    g = dgl.heterograph({
-        ('drug', 'interacts', 'drug'): (d_i_d_u_nodes, d_i_d_v_nodes),
-        ('drug', 'interacts', 'gene'): (d_i_g_u_nodes, d_i_g_v_nodes),
-        ('drug', 'treats', 'disease'): (d_t_d_u_nodes, d_t_d_v_nodes)
-    }).to(F.ctx())
+    d_i_d_u_nodes = F.zerocopy_from_numpy(
+        np.unique(np.random.randint(300, size=100, dtype=dtype))
+    )
+    d_i_d_v_nodes = F.zerocopy_from_numpy(
+        np.random.randint(25, size=d_i_d_u_nodes.shape, dtype=dtype)
+    )
+    d_i_g_u_nodes = F.zerocopy_from_numpy(
+        np.unique(np.random.randint(300, size=100, dtype=dtype))
+    )
+    d_i_g_v_nodes = F.zerocopy_from_numpy(
+        np.random.randint(25, size=d_i_g_u_nodes.shape, dtype=dtype)
+    )
+    d_t_d_u_nodes = F.zerocopy_from_numpy(
+        np.unique(np.random.randint(300, size=100, dtype=dtype))
+    )
+    d_t_d_v_nodes = F.zerocopy_from_numpy(
+        np.random.randint(25, size=d_t_d_u_nodes.shape, dtype=dtype)
+    )
+
+    g = dgl.heterograph(
+        {
+            ("drug", "interacts", "drug"): (d_i_d_u_nodes, d_i_d_v_nodes),
+            ("drug", "interacts", "gene"): (d_i_g_u_nodes, d_i_g_v_nodes),
+            ("drug", "treats", "disease"): (d_t_d_u_nodes, d_t_d_v_nodes),
+        }
+    ).to(F.ctx())

    (U, V, EID) = (0, 1, 2)

@@ -995,7 +1406,9 @@ def test_sample_neighbors_exclude_edges_heteroG(dtype):
    did_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
    sampled_amount = np.random.randint(low=1, high=10, dtype=dtype)

-    drug_i_drug_edges = g.all_edges(form='all', etype=('drug','interacts','drug'))
+    drug_i_drug_edges = g.all_edges(
+        form="all", etype=("drug", "interacts", "drug")
+    )
    excluded_d_i_d_edges = drug_i_drug_edges[EID][did_b_idx:did_e_idx]
    sampled_drug_node = drug_i_drug_edges[V][nd_b_idx:nd_e_idx]
    did_excluded_nodes_U = drug_i_drug_edges[U][did_b_idx:did_e_idx]
@@ -1005,7 +1418,9 @@ def test_sample_neighbors_exclude_edges_heteroG(dtype):
    nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
    dig_b_idx = np.random.randint(low=1, high=24, dtype=dtype)
    dig_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
-    drug_i_gene_edges = g.all_edges(form='all', etype=('drug','interacts','gene'))
+    drug_i_gene_edges = g.all_edges(
+        form="all", etype=("drug", "interacts", "gene")
+    )
    excluded_d_i_g_edges = drug_i_gene_edges[EID][dig_b_idx:dig_e_idx]
    dig_excluded_nodes_U = drug_i_gene_edges[U][dig_b_idx:dig_e_idx]
    dig_excluded_nodes_V = drug_i_gene_edges[V][dig_b_idx:dig_e_idx]
@@ -1015,65 +1430,110 @@ def test_sample_neighbors_exclude_edges_heteroG(dtype):
    nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
    dtd_b_idx = np.random.randint(low=1, high=24, dtype=dtype)
    dtd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
-    drug_t_dis_edges = g.all_edges(form='all', etype=('drug','treats','disease'))
+    drug_t_dis_edges = g.all_edges(
+        form="all", etype=("drug", "treats", "disease")
+    )
    excluded_d_t_d_edges = drug_t_dis_edges[EID][dtd_b_idx:dtd_e_idx]
    dtd_excluded_nodes_U = drug_t_dis_edges[U][dtd_b_idx:dtd_e_idx]
    dtd_excluded_nodes_V = drug_t_dis_edges[V][dtd_b_idx:dtd_e_idx]
    sampled_disease_node = drug_t_dis_edges[V][nd_b_idx:nd_e_idx]
-    excluded_edges  = {('drug', 'interacts', 'drug'): excluded_d_i_d_edges,
-                       ('drug', 'interacts', 'gene'): excluded_d_i_g_edges,
-                       ('drug', 'treats', 'disease'): excluded_d_t_d_edges
-                      }
-
-    sg = dgl.sampling.sample_neighbors(g, {'drug': sampled_drug_node,
-                                           'gene': sampled_gene_node,
-                                           'disease': sampled_disease_node},
-                                       sampled_amount, exclude_edges=excluded_edges)
-
-    assert not np.any(F.asnumpy(sg.has_edges_between(did_excluded_nodes_U,did_excluded_nodes_V,
-                                                     etype=('drug','interacts','drug'))))
-    assert not np.any(F.asnumpy(sg.has_edges_between(dig_excluded_nodes_U,dig_excluded_nodes_V,
-                                                     etype=('drug','interacts','gene'))))
-    assert not np.any(F.asnumpy(sg.has_edges_between(dtd_excluded_nodes_U,dtd_excluded_nodes_V,
-                                                     etype=('drug','treats','disease'))))
-
-@pytest.mark.parametrize('dtype', ['int32', 'int64'])
+    excluded_edges = {
+        ("drug", "interacts", "drug"): excluded_d_i_d_edges,
+        ("drug", "interacts", "gene"): excluded_d_i_g_edges,
+        ("drug", "treats", "disease"): excluded_d_t_d_edges,
+    }
+
+    sg = dgl.sampling.sample_neighbors(
+        g,
+        {
+            "drug": sampled_drug_node,
+            "gene": sampled_gene_node,
+            "disease": sampled_disease_node,
+        },
+        sampled_amount,
+        exclude_edges=excluded_edges,
+    )
+
+    assert not np.any(
+        F.asnumpy(
+            sg.has_edges_between(
+                did_excluded_nodes_U,
+                did_excluded_nodes_V,
+                etype=("drug", "interacts", "drug"),
+            )
+        )
+    )
+    assert not np.any(
+        F.asnumpy(
+            sg.has_edges_between(
+                dig_excluded_nodes_U,
+                dig_excluded_nodes_V,
+                etype=("drug", "interacts", "gene"),
+            )
+        )
+    )
+    assert not np.any(
+        F.asnumpy(
+            sg.has_edges_between(
+                dtd_excluded_nodes_U,
+                dtd_excluded_nodes_V,
+                etype=("drug", "treats", "disease"),
+            )
+        )
+    )
+
+
+@pytest.mark.parametrize("dtype", ["int32", "int64"])
 def test_sample_neighbors_exclude_edges_homoG(dtype):
-    u_nodes = F.zerocopy_from_numpy(np.unique(np.random.randint(300,size=100, dtype=dtype)))
-    v_nodes = F.zerocopy_from_numpy(np.random.randint(25, size=u_nodes.shape, dtype=dtype))
+    u_nodes = F.zerocopy_from_numpy(
+        np.unique(np.random.randint(300, size=100, dtype=dtype))
+    )
+    v_nodes = F.zerocopy_from_numpy(
+        np.random.randint(25, size=u_nodes.shape, dtype=dtype)
+    )
    g = dgl.graph((u_nodes, v_nodes)).to(F.ctx())

    (U, V, EID) = (0, 1, 2)

-    nd_b_idx = np.random.randint(low=1,high=24, dtype=dtype)
-    nd_e_idx = np.random.randint(low=25,high=49, dtype=dtype)
-    b_idx = np.random.randint(low=1,high=24, dtype=dtype)
-    e_idx = np.random.randint(low=25,high=49, dtype=dtype)
-    sampled_amount = np.random.randint(low=1,high=10, dtype=dtype)
+    nd_b_idx = np.random.randint(low=1, high=24, dtype=dtype)
+    nd_e_idx = np.random.randint(low=25, high=49, dtype=dtype)
+    b_idx = np.random.randint(low=1, high=24, dtype=dtype)
+    e_idx = np.random.randint(low=25, high=49, dtype=dtype)
+    sampled_amount = np.random.randint(low=1, high=10, dtype=dtype)

-    g_edges = g.all_edges(form='all')
+    g_edges = g.all_edges(form="all")
    excluded_edges = g_edges[EID][b_idx:e_idx]
    sampled_node = g_edges[V][nd_b_idx:nd_e_idx]
    excluded_nodes_U = g_edges[U][b_idx:e_idx]
    excluded_nodes_V = g_edges[V][b_idx:e_idx]

-    sg = dgl.sampling.sample_neighbors(g, sampled_node,
-                                       sampled_amount, exclude_edges=excluded_edges)
+    sg = dgl.sampling.sample_neighbors(
+        g, sampled_node, sampled_amount, exclude_edges=excluded_edges
+    )
+
+    assert not np.any(
+        F.asnumpy(sg.has_edges_between(excluded_nodes_U, excluded_nodes_V))
+    )

-    assert not np.any(F.asnumpy(sg.has_edges_between(excluded_nodes_U,excluded_nodes_V)))

-@pytest.mark.parametrize('dtype', ['int32', 'int64'])
+@pytest.mark.parametrize("dtype", ["int32", "int64"])
 def test_global_uniform_negative_sampling(dtype):
    g = dgl.graph(([], []), num_nodes=1000).to(F.ctx())
-    src, dst = dgl.sampling.global_uniform_negative_sampling(g, 2000, False, True)
+    src, dst = dgl.sampling.global_uniform_negative_sampling(
+        g, 2000, False, True
+    )
    assert len(src) == 2000
    assert len(dst) == 2000

-    g = dgl.graph((np.random.randint(0, 20, (300,)), np.random.randint(0, 20, (300,)))).to(F.ctx())
+    g = dgl.graph(
+        (np.random.randint(0, 20, (300,)), np.random.randint(0, 20, (300,)))
+    ).to(F.ctx())
    src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, False, True)
    assert not F.asnumpy(g.has_edges_between(src, dst)).any()

-    src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, False, False)
+    src, dst = dgl.sampling.global_uniform_negative_sampling(
+        g, 20, False, False
+    )
    assert not F.asnumpy(g.has_edges_between(src, dst)).any()
    src = F.asnumpy(src)
    dst = F.asnumpy(dst)
@@ -1081,7 +1541,9 @@ def test_global_uniform_negative_sampling(dtype):
    assert len(s) == len(src)

    g = dgl.graph(([0], [1])).to(F.ctx())
-    src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, True, False, redundancy=10)
+    src, dst = dgl.sampling.global_uniform_negative_sampling(
+        g, 20, True, False, redundancy=10
+    )
    src = F.asnumpy(src)
    dst = F.asnumpy(dst)
    # should have either no element or (1, 0)
@@ -1091,21 +1553,33 @@ def test_global_uniform_negative_sampling(dtype):
        assert src[0] == 1
        assert dst[0] == 0

-    g = dgl.heterograph({
-        ('A', 'AB', 'B'): (np.random.randint(0, 20, (300,)), np.random.randint(0, 40, (300,))),
-        ('B', 'BA', 'A'): (np.random.randint(0, 40, (200,)), np.random.randint(0, 20, (200,)))}).to(F.ctx())
-    src, dst = dgl.sampling.global_uniform_negative_sampling(g, 20, False, etype='AB')
-    assert not F.asnumpy(g.has_edges_between(src, dst, etype='AB')).any()
+    g = dgl.heterograph(
+        {
+            ("A", "AB", "B"): (
+                np.random.randint(0, 20, (300,)),
+                np.random.randint(0, 40, (300,)),
+            ),
+            ("B", "BA", "A"): (
+                np.random.randint(0, 40, (200,)),
+                np.random.randint(0, 20, (200,)),
+            ),
+        }
+    ).to(F.ctx())
+    src, dst = dgl.sampling.global_uniform_negative_sampling(
+        g, 20, False, etype="AB"
+    )
+    assert not F.asnumpy(g.has_edges_between(src, dst, etype="AB")).any()


-if __name__ == '__main__':
+if __name__ == "__main__":
    from itertools import product
+
    test_sample_neighbors_noprob()
    test_sample_neighbors_prob()
    test_sample_neighbors_mask()
-    for args in product(['coo', 'csr', 'csc'], ['in', 'out'], [False, True]):
+    for args in product(["coo", "csr", "csc"], ["in", "out"], [False, True]):
        test_sample_neighbors_etype_homogeneous(*args)
-    for args in product(['csr', 'csc'], ['in', 'out']):
+    for args in product(["csr", "csc"], ["in", "out"]):
        test_sample_neighbors_etype_sorted_homogeneous(*args)
    test_non_uniform_random_walk(False)
    test_uniform_random_walk(False)
@@ -1117,7 +1591,7 @@ if __name__ == '__main__':
    test_sample_neighbors_with_0deg()
    test_sample_neighbors_biased_homogeneous()
    test_sample_neighbors_biased_bipartite()
-    test_sample_neighbors_exclude_edges_heteroG('int32')
-    test_sample_neighbors_exclude_edges_homoG('int32')
-    test_global_uniform_negative_sampling('int32')
-    test_global_uniform_negative_sampling('int64')
+    test_sample_neighbors_exclude_edges_heteroG("int32")
+    test_sample_neighbors_exclude_edges_homoG("int32")
+    test_global_uniform_negative_sampling("int32")
+    test_global_uniform_negative_sampling("int64")
--- a/tests/python/common/test_batch-graph.py
+++ b/tests/python/common/test_batch-graph.py
+import unittest
+
+import backend as F
+
 import dgl
 import numpy as np
-import backend as F
-import unittest
 from test_utils import parametrize_idtype

+
 def tree1(idtype):
    """Generate a tree
         0
@@ -19,10 +22,11 @@ def tree1(idtype):
    g.add_edges(4, 1)
    g.add_edges(1, 0)
    g.add_edges(2, 0)
-    g.ndata['h'] = F.tensor([0, 1, 2, 3, 4])
-    g.edata['h'] = F.randn((4, 10))
+    g.ndata["h"] = F.tensor([0, 1, 2, 3, 4])
+    g.edata["h"] = F.randn((4, 10))
    return g

+
 def tree2(idtype):
    """Generate a tree
         1
@@ -38,10 +42,11 @@ def tree2(idtype):
    g.add_edges(0, 4)
    g.add_edges(4, 1)
    g.add_edges(3, 1)
-    g.ndata['h'] = F.tensor([0, 1, 2, 3, 4])
-    g.edata['h'] = F.randn((4, 10))
+    g.ndata["h"] = F.tensor([0, 1, 2, 3, 4])
+    g.edata["h"] = F.randn((4, 10))
    return g

+
 @parametrize_idtype
 def test_batch_unbatch(idtype):
    t1 = tree1(idtype)
@@ -55,10 +60,11 @@ def test_batch_unbatch(idtype):
    assert F.allclose(bg.batch_num_edges(), F.tensor([4, 4]))

    tt1, tt2 = dgl.unbatch(bg)
-    assert F.allclose(t1.ndata['h'], tt1.ndata['h'])
-    assert F.allclose(t1.edata['h'], tt1.edata['h'])
-    assert F.allclose(t2.ndata['h'], tt2.ndata['h'])
-    assert F.allclose(t2.edata['h'], tt2.edata['h'])
+    assert F.allclose(t1.ndata["h"], tt1.ndata["h"])
+    assert F.allclose(t1.edata["h"], tt1.edata["h"])
+    assert F.allclose(t2.ndata["h"], tt2.ndata["h"])
+    assert F.allclose(t2.edata["h"], tt2.edata["h"])
+

 @parametrize_idtype
 def test_batch_unbatch1(idtype):
@@ -73,14 +79,18 @@ def test_batch_unbatch1(idtype):
    assert F.allclose(b2.batch_num_edges(), F.tensor([4, 4, 4]))

    s1, s2, s3 = dgl.unbatch(b2)
-    assert F.allclose(t2.ndata['h'], s1.ndata['h'])
-    assert F.allclose(t2.edata['h'], s1.edata['h'])
-    assert F.allclose(t1.ndata['h'], s2.ndata['h'])
-    assert F.allclose(t1.edata['h'], s2.edata['h'])
-    assert F.allclose(t2.ndata['h'], s3.ndata['h'])
-    assert F.allclose(t2.edata['h'], s3.edata['h'])
-
-@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TF doesn't support inplace update")
+    assert F.allclose(t2.ndata["h"], s1.ndata["h"])
+    assert F.allclose(t2.edata["h"], s1.edata["h"])
+    assert F.allclose(t1.ndata["h"], s2.ndata["h"])
+    assert F.allclose(t1.edata["h"], s2.edata["h"])
+    assert F.allclose(t2.ndata["h"], s3.ndata["h"])
+    assert F.allclose(t2.edata["h"], s3.edata["h"])
+
+
+@unittest.skipIf(
+    dgl.backend.backend_name == "tensorflow",
+    reason="TF doesn't support inplace update",
+)
 @parametrize_idtype
 def test_batch_unbatch_frame(idtype):
    """Test module of node/edge frames of batched/unbatched DGLGraphs.
@@ -93,30 +103,31 @@ def test_batch_unbatch_frame(idtype):
    N2 = t2.number_of_nodes()
    E2 = t2.number_of_edges()
    D = 10
-    t1.ndata['h'] = F.randn((N1, D))
-    t1.edata['h'] = F.randn((E1, D))
-    t2.ndata['h'] = F.randn((N2, D))
-    t2.edata['h'] = F.randn((E2, D))
-    
+    t1.ndata["h"] = F.randn((N1, D))
+    t1.edata["h"] = F.randn((E1, D))
+    t2.ndata["h"] = F.randn((N2, D))
+    t2.edata["h"] = F.randn((E2, D))
+
    b1 = dgl.batch([t1, t2])
    b2 = dgl.batch([t2])
-    b1.ndata['h'][:N1] = F.zeros((N1, D))
-    b1.edata['h'][:E1] = F.zeros((E1, D))
-    b2.ndata['h'][:N2] = F.zeros((N2, D))
-    b2.edata['h'][:E2] = F.zeros((E2, D))
-    assert not F.allclose(t1.ndata['h'], F.zeros((N1, D)))
-    assert not F.allclose(t1.edata['h'], F.zeros((E1, D)))
-    assert not F.allclose(t2.ndata['h'], F.zeros((N2, D)))
-    assert not F.allclose(t2.edata['h'], F.zeros((E2, D)))
+    b1.ndata["h"][:N1] = F.zeros((N1, D))
+    b1.edata["h"][:E1] = F.zeros((E1, D))
+    b2.ndata["h"][:N2] = F.zeros((N2, D))
+    b2.edata["h"][:E2] = F.zeros((E2, D))
+    assert not F.allclose(t1.ndata["h"], F.zeros((N1, D)))
+    assert not F.allclose(t1.edata["h"], F.zeros((E1, D)))
+    assert not F.allclose(t2.ndata["h"], F.zeros((N2, D)))
+    assert not F.allclose(t2.edata["h"], F.zeros((E2, D)))

    g1, g2 = dgl.unbatch(b1)
-    _g2, = dgl.unbatch(b2)
-    assert F.allclose(g1.ndata['h'], F.zeros((N1, D)))
-    assert F.allclose(g1.edata['h'], F.zeros((E1, D)))
-    assert F.allclose(g2.ndata['h'], t2.ndata['h'])
-    assert F.allclose(g2.edata['h'], t2.edata['h'])
-    assert F.allclose(_g2.ndata['h'], F.zeros((N2, D)))
-    assert F.allclose(_g2.edata['h'], F.zeros((E2, D)))
+    (_g2,) = dgl.unbatch(b2)
+    assert F.allclose(g1.ndata["h"], F.zeros((N1, D)))
+    assert F.allclose(g1.edata["h"], F.zeros((E1, D)))
+    assert F.allclose(g2.ndata["h"], t2.ndata["h"])
+    assert F.allclose(g2.edata["h"], t2.edata["h"])
+    assert F.allclose(_g2.ndata["h"], F.zeros((N2, D)))
+    assert F.allclose(_g2.edata["h"], F.zeros((E2, D)))
+

 @parametrize_idtype
 def test_batch_unbatch2(idtype):
@@ -128,10 +139,11 @@ def test_batch_unbatch2(idtype):
    b.add_nodes(3)
    b.add_edges(0, [1, 2])
    c = dgl.batch([a, b])
-    c.ndata['h'] = F.ones((7, 1))
-    c.edata['w'] = F.ones((5, 1))
-    assert F.allclose(c.ndata['h'], F.ones((7, 1)))
-    assert F.allclose(c.edata['w'], F.ones((5, 1)))
+    c.ndata["h"] = F.ones((7, 1))
+    c.edata["w"] = F.ones((5, 1))
+    assert F.allclose(c.ndata["h"], F.ones((7, 1)))
+    assert F.allclose(c.edata["w"], F.ones((5, 1)))
+

 @parametrize_idtype
 def test_batch_send_and_recv(idtype):
@@ -139,16 +151,17 @@ def test_batch_send_and_recv(idtype):
    t2 = tree2(idtype)

    bg = dgl.batch([t1, t2])
-    _mfunc = lambda edges: {'m' : edges.src['h']}
-    _rfunc = lambda nodes: {'h' : F.sum(nodes.mailbox['m'], 1)}
+    _mfunc = lambda edges: {"m": edges.src["h"]}
+    _rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)}
    u = [3, 4, 2 + 5, 0 + 5]
    v = [1, 1, 4 + 5, 4 + 5]

    bg.send_and_recv((u, v), _mfunc, _rfunc)

    t1, t2 = dgl.unbatch(bg)
-    assert F.asnumpy(t1.ndata['h'][1]) == 7
-    assert F.asnumpy(t2.ndata['h'][4]) == 2
+    assert F.asnumpy(t1.ndata["h"][1]) == 7
+    assert F.asnumpy(t2.ndata["h"][4]) == 2
+

 @parametrize_idtype
 def test_batch_propagate(idtype):
@@ -156,8 +169,8 @@ def test_batch_propagate(idtype):
    t2 = tree2(idtype)

    bg = dgl.batch([t1, t2])
-    _mfunc = lambda edges: {'m' : edges.src['h']}
-    _rfunc = lambda nodes: {'h' : F.sum(nodes.mailbox['m'], 1)}
+    _mfunc = lambda edges: {"m": edges.src["h"]}
+    _rfunc = lambda nodes: {"h": F.sum(nodes.mailbox["m"], 1)}
    # get leaves.

    order = []
@@ -175,8 +188,9 @@ def test_batch_propagate(idtype):
    bg.prop_edges(order, _mfunc, _rfunc)
    t1, t2 = dgl.unbatch(bg)

-    assert F.asnumpy(t1.ndata['h'][0]) == 9
-    assert F.asnumpy(t2.ndata['h'][1]) == 5
+    assert F.asnumpy(t1.ndata["h"][0]) == 9
+    assert F.asnumpy(t2.ndata["h"][1]) == 5
+

 @parametrize_idtype
 def test_batched_edge_ordering(idtype):
@@ -184,17 +198,18 @@ def test_batched_edge_ordering(idtype):
    g1.add_nodes(6)
    g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1])
    e1 = F.randn((5, 10))
-    g1.edata['h'] = e1
+    g1.edata["h"] = e1
    g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
    g2.add_nodes(6)
-    g2.add_edges([0, 1 ,2 ,5, 4 ,5], [1, 2, 3, 4, 3, 0])
+    g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0])
    e2 = F.randn((6, 10))
-    g2.edata['h'] = e2
+    g2.edata["h"] = e2
    g = dgl.batch([g1, g2])
-    r1 = g.edata['h'][g.edge_ids(4, 5)]
-    r2 = g1.edata['h'][g1.edge_ids(4, 5)]
+    r1 = g.edata["h"][g.edge_ids(4, 5)]
+    r2 = g1.edata["h"][g1.edge_ids(4, 5)]
    assert F.array_equal(r1, r2)

+
 @parametrize_idtype
 def test_batch_no_edge(idtype):
    g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
@@ -202,22 +217,24 @@ def test_batch_no_edge(idtype):
    g1.add_edges([4, 4, 2, 2, 0], [5, 3, 3, 1, 1])
    g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
    g2.add_nodes(6)
-    g2.add_edges([0, 1, 2, 5, 4, 5], [1 ,2 ,3, 4, 3, 0])
+    g2.add_edges([0, 1, 2, 5, 4, 5], [1, 2, 3, 4, 3, 0])
    g3 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
    g3.add_nodes(1)  # no edges
-    g = dgl.batch([g1, g3, g2]) # should not throw an error
+    g = dgl.batch([g1, g3, g2])  # should not throw an error
+

 @parametrize_idtype
 def test_batch_keeps_empty_data(idtype):
    g1 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
    g1.ndata["nh"] = F.tensor([])
-    g1.edata["eh"] = F.tensor([]) 
+    g1.edata["eh"] = F.tensor([])
    g2 = dgl.graph(([], [])).astype(idtype).to(F.ctx())
    g2.ndata["nh"] = F.tensor([])
-    g2.edata["eh"] = F.tensor([]) 
+    g2.edata["eh"] = F.tensor([])
    g = dgl.batch([g1, g2])
    assert "nh" in g.ndata
-    assert "eh" in g.edata    
+    assert "eh" in g.edata
+

 def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
    """Internal function to compute batch information for subgraphs.
@@ -235,12 +252,16 @@ def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
        A dictionary mapping all node/edge type keys to the ``batch_num_objs``
        array of corresponding graph.
    """
-    bucket_offset = np.expand_dims(np.cumsum(F.asnumpy(batch_num_objs), 0), -1)  # (num_bkts, 1)
+    bucket_offset = np.expand_dims(
+        np.cumsum(F.asnumpy(batch_num_objs), 0), -1
+    )  # (num_bkts, 1)
    ret = {}
    for key, induced_indices in zip(keys, induced_indices_arr):
        # NOTE(Zihao): this implementation is not efficient and we can replace it with
        # binary search in the future.
-        induced_indices = np.expand_dims(F.asnumpy(induced_indices), 0)  # (1, num_nodes)
+        induced_indices = np.expand_dims(
+            F.asnumpy(induced_indices), 0
+        )  # (1, num_nodes)
        new_offset = np.sum((induced_indices < bucket_offset), 1)  # (num_bkts,)
        # start_offset = [0] + [new_offset[i-1] for i in range(1, n_bkts)]
        start_offset = np.concatenate([np.zeros((1,)), new_offset[:-1]], 0)
@@ -248,6 +269,7 @@ def _get_subgraph_batch_info(keys, induced_indices_arr, batch_num_objs):
        ret[key] = F.tensor(new_batch_num_objs, dtype=F.dtype(batch_num_objs))
    return ret

+
 @parametrize_idtype
 def test_set_batch_info(idtype):
    ctx = F.ctx()
@@ -257,13 +279,17 @@ def test_set_batch_info(idtype):
    bg = dgl.batch([g1, g2])
    batch_num_nodes = F.astype(bg.batch_num_nodes(), idtype)
    batch_num_edges = F.astype(bg.batch_num_edges(), idtype)
-    
+
    # test homogeneous node subgraph
    sg_n = dgl.node_subgraph(bg, list(range(10, 20)) + list(range(50, 60)))
-    induced_nodes = sg_n.ndata['_ID']
-    induced_edges = sg_n.edata['_ID']
-    new_batch_num_nodes = _get_subgraph_batch_info(bg.ntypes, [induced_nodes], batch_num_nodes)
-    new_batch_num_edges = _get_subgraph_batch_info(bg.canonical_etypes, [induced_edges], batch_num_edges)
+    induced_nodes = sg_n.ndata["_ID"]
+    induced_edges = sg_n.edata["_ID"]
+    new_batch_num_nodes = _get_subgraph_batch_info(
+        bg.ntypes, [induced_nodes], batch_num_nodes
+    )
+    new_batch_num_edges = _get_subgraph_batch_info(
+        bg.canonical_etypes, [induced_edges], batch_num_edges
+    )
    sg_n.set_batch_num_nodes(new_batch_num_nodes)
    sg_n.set_batch_num_edges(new_batch_num_edges)
    subg_n1, subg_n2 = dgl.unbatch(sg_n)
@@ -273,11 +299,17 @@ def test_set_batch_info(idtype):
    assert subg_n2.num_edges() == subg2.num_edges()

    # test homogeneous edge subgraph
-    sg_e = dgl.edge_subgraph(bg, list(range(40, 70)) + list(range(150, 200)), relabel_nodes=False)
+    sg_e = dgl.edge_subgraph(
+        bg, list(range(40, 70)) + list(range(150, 200)), relabel_nodes=False
+    )
    induced_nodes = F.arange(0, bg.num_nodes(), idtype)
-    induced_edges = sg_e.edata['_ID']
-    new_batch_num_nodes = _get_subgraph_batch_info(bg.ntypes, [induced_nodes], batch_num_nodes)
-    new_batch_num_edges = _get_subgraph_batch_info(bg.canonical_etypes, [induced_edges], batch_num_edges)
+    induced_edges = sg_e.edata["_ID"]
+    new_batch_num_nodes = _get_subgraph_batch_info(
+        bg.ntypes, [induced_nodes], batch_num_nodes
+    )
+    new_batch_num_edges = _get_subgraph_batch_info(
+        bg.canonical_etypes, [induced_edges], batch_num_edges
+    )
    sg_e.set_batch_num_nodes(new_batch_num_nodes)
    sg_e.set_batch_num_edges(new_batch_num_edges)
    subg_e1, subg_e2 = dgl.unbatch(sg_e)
@@ -287,15 +319,14 @@ def test_set_batch_info(idtype):
    assert subg_e2.num_nodes() == subg2.num_nodes()


-if __name__ == '__main__':
-    #test_batch_unbatch()
-    #test_batch_unbatch1()
-    #test_batch_unbatch_frame()
-    #test_batch_unbatch2()
-    #test_batched_edge_ordering()
-    #test_batch_send_then_recv()
-    #test_batch_send_and_recv()
-    #test_batch_propagate()
-    #test_batch_no_edge()
+if __name__ == "__main__":
+    # test_batch_unbatch()
+    # test_batch_unbatch1()
+    # test_batch_unbatch_frame()
+    # test_batch_unbatch2()
+    # test_batched_edge_ordering()
+    # test_batch_send_then_recv()
+    # test_batch_send_and_recv()
+    # test_batch_propagate()
+    # test_batch_no_edge()
    test_set_batch_info(F.int32)
-    
--- a/tests/python/common/test_batch-heterograph.py
+++ b/tests/python/common/test_batch-heterograph.py
-import dgl
-import backend as F
 import unittest
-import pytest

+import backend as F
+
+import dgl
+import pytest
 from dgl.base import ALL
-from test_utils import parametrize_idtype
-from test_utils import check_graph_equal, get_cases
+from test_utils import check_graph_equal, get_cases, parametrize_idtype


-def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=None):
+def check_equivalence_between_heterographs(
+    g1, g2, node_attrs=None, edge_attrs=None
+):
    assert g1.ntypes == g2.ntypes
    assert g1.etypes == g2.etypes
    assert g1.canonical_etypes == g2.canonical_etypes
@@ -22,8 +24,8 @@ def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=N

    for ety in g1.canonical_etypes:
        assert g1.number_of_edges(ety) == g2.number_of_edges(ety)
-        src1, dst1, eid1 = g1.edges(etype=ety, form='all')
-        src2, dst2, eid2 = g2.edges(etype=ety, form='all')
+        src1, dst1, eid1 = g1.edges(etype=ety, form="all")
+        src2, dst2, eid2 = g2.edges(etype=ety, form="all")
        assert F.allclose(src1, src2)
        assert F.allclose(dst1, dst2)
        assert F.allclose(eid1, eid2)
@@ -34,7 +36,8 @@ def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=N
                continue
            for feat_name in node_attrs[nty]:
                assert F.allclose(
-                    g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name])
+                    g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name]
+                )

    if edge_attrs is not None:
        for ety in edge_attrs.keys():
@@ -42,10 +45,11 @@ def check_equivalence_between_heterographs(g1, g2, node_attrs=None, edge_attrs=N
                continue
            for feat_name in edge_attrs[ety]:
                assert F.allclose(
-                    g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name])
+                    g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name]
+                )


-@pytest.mark.parametrize('gs', get_cases(['two_hetero_batch']))
+@pytest.mark.parametrize("gs", get_cases(["two_hetero_batch"]))
 @parametrize_idtype
 def test_topology(gs, idtype):
    """Test batching two DGLGraphs where some nodes are isolated in some relations"""
@@ -65,30 +69,37 @@ def test_topology(gs, idtype):
    for ntype in bg.ntypes:
        print(ntype)
        assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [
-            g1.number_of_nodes(ntype), g2.number_of_nodes(ntype)]
+            g1.number_of_nodes(ntype),
+            g2.number_of_nodes(ntype),
+        ]
        assert bg.number_of_nodes(ntype) == (
-            g1.number_of_nodes(ntype) + g2.number_of_nodes(ntype))
+            g1.number_of_nodes(ntype) + g2.number_of_nodes(ntype)
+        )

    # Test number of edges
    for etype in bg.canonical_etypes:
        assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [
-            g1.number_of_edges(etype), g2.number_of_edges(etype)]
+            g1.number_of_edges(etype),
+            g2.number_of_edges(etype),
+        ]
        assert bg.number_of_edges(etype) == (
-            g1.number_of_edges(etype) + g2.number_of_edges(etype))
+            g1.number_of_edges(etype) + g2.number_of_edges(etype)
+        )

    # Test relabeled nodes
    for ntype in bg.ntypes:
        assert list(F.asnumpy(bg.nodes(ntype))) == list(
-            range(bg.number_of_nodes(ntype)))
+            range(bg.number_of_nodes(ntype))
+        )

    # Test relabeled edges
-    src, dst = bg.edges(etype=('user', 'follows', 'user'))
+    src, dst = bg.edges(etype=("user", "follows", "user"))
    assert list(F.asnumpy(src)) == [0, 1, 4, 5]
    assert list(F.asnumpy(dst)) == [1, 2, 5, 6]
-    src, dst = bg.edges(etype=('user', 'follows', 'developer'))
+    src, dst = bg.edges(etype=("user", "follows", "developer"))
    assert list(F.asnumpy(src)) == [0, 1, 4, 5]
    assert list(F.asnumpy(dst)) == [1, 2, 4, 5]
-    src, dst, eid = bg.edges(etype='plays', form='all')
+    src, dst, eid = bg.edges(etype="plays", form="all")
    assert list(F.asnumpy(src)) == [0, 1, 2, 3, 4, 5, 6]
    assert list(F.asnumpy(dst)) == [0, 0, 1, 1, 2, 2, 3]
    assert list(F.asnumpy(eid)) == [0, 1, 2, 3, 4, 5, 6]
@@ -113,19 +124,31 @@ def test_topology(gs, idtype):
 @parametrize_idtype
 def test_batching_batched(idtype):
    """Test batching a DGLGraph and a batched DGLGraph."""
-    g1 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1], [0, 0])
-    }, idtype=idtype, device=F.ctx())
-    g2 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1], [0, 0])
-    }, idtype=idtype, device=F.ctx())
+    g1 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1], [0, 0]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g2 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1], [0, 0]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
    bg1 = dgl.batch([g1, g2])
-    g3 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0], [1]),
-        ('user', 'plays', 'game'): ([1], [0])
-    }, idtype=idtype, device=F.ctx())
+    g3 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0], [1]),
+            ("user", "plays", "game"): ([1], [0]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
    bg2 = dgl.batch([bg1, g3])
    assert bg2.idtype == idtype
    assert bg2.device == F.ctx()
@@ -137,27 +160,40 @@ def test_batching_batched(idtype):
    # Test number of nodes
    for ntype in bg2.ntypes:
        assert F.asnumpy(bg2.batch_num_nodes(ntype)).tolist() == [
-            g1.number_of_nodes(ntype), g2.number_of_nodes(ntype), g3.number_of_nodes(ntype)]
+            g1.number_of_nodes(ntype),
+            g2.number_of_nodes(ntype),
+            g3.number_of_nodes(ntype),
+        ]
        assert bg2.number_of_nodes(ntype) == (
-            g1.number_of_nodes(ntype) + g2.number_of_nodes(ntype) + g3.number_of_nodes(ntype))
+            g1.number_of_nodes(ntype)
+            + g2.number_of_nodes(ntype)
+            + g3.number_of_nodes(ntype)
+        )

    # Test number of edges
    for etype in bg2.canonical_etypes:
        assert F.asnumpy(bg2.batch_num_edges(etype)).tolist() == [
-            g1.number_of_edges(etype), g2.number_of_edges(etype), g3.number_of_edges(etype)]
+            g1.number_of_edges(etype),
+            g2.number_of_edges(etype),
+            g3.number_of_edges(etype),
+        ]
        assert bg2.number_of_edges(etype) == (
-            g1.number_of_edges(etype) + g2.number_of_edges(etype) + g3.number_of_edges(etype))
+            g1.number_of_edges(etype)
+            + g2.number_of_edges(etype)
+            + g3.number_of_edges(etype)
+        )

    # Test relabeled nodes
    for ntype in bg2.ntypes:
        assert list(F.asnumpy(bg2.nodes(ntype))) == list(
-            range(bg2.number_of_nodes(ntype)))
+            range(bg2.number_of_nodes(ntype))
+        )

    # Test relabeled edges
-    src, dst = bg2.edges(etype='follows')
+    src, dst = bg2.edges(etype="follows")
    assert list(F.asnumpy(src)) == [0, 1, 3, 4, 6]
    assert list(F.asnumpy(dst)) == [1, 2, 4, 5, 7]
-    src, dst = bg2.edges(etype='plays')
+    src, dst = bg2.edges(etype="plays")
    assert list(F.asnumpy(src)) == [0, 1, 3, 4, 7]
    assert list(F.asnumpy(dst)) == [0, 0, 1, 1, 2]

@@ -171,136 +207,228 @@ def test_batching_batched(idtype):
 @parametrize_idtype
 def test_features(idtype):
    """Test the features of batched DGLGraphs"""
-    g1 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1], [0, 0])
-    }, idtype=idtype, device=F.ctx())
-    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
-    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
-    g1.nodes['game'].data['h1'] = F.tensor([[0.]])
-    g1.nodes['game'].data['h2'] = F.tensor([[1.]])
-    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
-    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
-    g1.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])
-
-    g2 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1], [0, 0])
-    }, idtype=idtype, device=F.ctx())
-    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
-    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
-    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
-    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
-    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
-    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
-    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])
+    g1 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1], [0, 0]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g1.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
+    g1.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
+    g1.nodes["game"].data["h1"] = F.tensor([[0.0]])
+    g1.nodes["game"].data["h2"] = F.tensor([[1.0]])
+    g1.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
+    g1.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
+    g1.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]])
+
+    g2 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1], [0, 0]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g2.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
+    g2.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
+    g2.nodes["game"].data["h1"] = F.tensor([[0.0]])
+    g2.nodes["game"].data["h2"] = F.tensor([[1.0]])
+    g2.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
+    g2.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
+    g2.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]])

    # test default setting
    bg = dgl.batch([g1, g2])
-    assert F.allclose(bg.nodes['user'].data['h1'],
-                      F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0))
-    assert F.allclose(bg.nodes['user'].data['h2'],
-                      F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0))
-    assert F.allclose(bg.nodes['game'].data['h1'],
-                      F.cat([g1.nodes['game'].data['h1'], g2.nodes['game'].data['h1']], dim=0))
-    assert F.allclose(bg.nodes['game'].data['h2'],
-                      F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0))
-    assert F.allclose(bg.edges['follows'].data['h1'],
-                      F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0))
-    assert F.allclose(bg.edges['follows'].data['h2'],
-                      F.cat([g1.edges['follows'].data['h2'], g2.edges['follows'].data['h2']], dim=0))
-    assert F.allclose(bg.edges['plays'].data['h1'],
-                      F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0))
+    assert F.allclose(
+        bg.nodes["user"].data["h1"],
+        F.cat(
+            [g1.nodes["user"].data["h1"], g2.nodes["user"].data["h1"]], dim=0
+        ),
+    )
+    assert F.allclose(
+        bg.nodes["user"].data["h2"],
+        F.cat(
+            [g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0
+        ),
+    )
+    assert F.allclose(
+        bg.nodes["game"].data["h1"],
+        F.cat(
+            [g1.nodes["game"].data["h1"], g2.nodes["game"].data["h1"]], dim=0
+        ),
+    )
+    assert F.allclose(
+        bg.nodes["game"].data["h2"],
+        F.cat(
+            [g1.nodes["game"].data["h2"], g2.nodes["game"].data["h2"]], dim=0
+        ),
+    )
+    assert F.allclose(
+        bg.edges["follows"].data["h1"],
+        F.cat(
+            [g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]],
+            dim=0,
+        ),
+    )
+    assert F.allclose(
+        bg.edges["follows"].data["h2"],
+        F.cat(
+            [g1.edges["follows"].data["h2"], g2.edges["follows"].data["h2"]],
+            dim=0,
+        ),
+    )
+    assert F.allclose(
+        bg.edges["plays"].data["h1"],
+        F.cat(
+            [g1.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]], dim=0
+        ),
+    )

    # test specifying ndata/edata
-    bg = dgl.batch([g1, g2], ndata=['h2'], edata=['h1'])
-    assert F.allclose(bg.nodes['user'].data['h2'],
-                      F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0))
-    assert F.allclose(bg.nodes['game'].data['h2'],
-                      F.cat([g1.nodes['game'].data['h2'], g2.nodes['game'].data['h2']], dim=0))
-    assert F.allclose(bg.edges['follows'].data['h1'],
-                      F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0))
-    assert F.allclose(bg.edges['plays'].data['h1'],
-                      F.cat([g1.edges['plays'].data['h1'], g2.edges['plays'].data['h1']], dim=0))
-    assert 'h1' not in bg.nodes['user'].data
-    assert 'h1' not in bg.nodes['game'].data
-    assert 'h2' not in bg.edges['follows'].data
+    bg = dgl.batch([g1, g2], ndata=["h2"], edata=["h1"])
+    assert F.allclose(
+        bg.nodes["user"].data["h2"],
+        F.cat(
+            [g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0
+        ),
+    )
+    assert F.allclose(
+        bg.nodes["game"].data["h2"],
+        F.cat(
+            [g1.nodes["game"].data["h2"], g2.nodes["game"].data["h2"]], dim=0
+        ),
+    )
+    assert F.allclose(
+        bg.edges["follows"].data["h1"],
+        F.cat(
+            [g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]],
+            dim=0,
+        ),
+    )
+    assert F.allclose(
+        bg.edges["plays"].data["h1"],
+        F.cat(
+            [g1.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]], dim=0
+        ),
+    )
+    assert "h1" not in bg.nodes["user"].data
+    assert "h1" not in bg.nodes["game"].data
+    assert "h2" not in bg.edges["follows"].data

    # Test unbatching graphs
    g3, g4 = dgl.unbatch(bg)
    check_equivalence_between_heterographs(
-        g1, g3,
-        node_attrs={'user': ['h2'], 'game': ['h2']},
-        edge_attrs={('user', 'follows', 'user'): ['h1']})
+        g1,
+        g3,
+        node_attrs={"user": ["h2"], "game": ["h2"]},
+        edge_attrs={("user", "follows", "user"): ["h1"]},
+    )
    check_equivalence_between_heterographs(
-        g2, g4,
-        node_attrs={'user': ['h2'], 'game': ['h2']},
-        edge_attrs={('user', 'follows', 'user'): ['h1']})
+        g2,
+        g4,
+        node_attrs={"user": ["h2"], "game": ["h2"]},
+        edge_attrs={("user", "follows", "user"): ["h1"]},
+    )


-@unittest.skipIf(F.backend_name == 'mxnet', reason="MXNet does not support split array with zero-length segment.")
+@unittest.skipIf(
+    F.backend_name == "mxnet",
+    reason="MXNet does not support split array with zero-length segment.",
+)
 @parametrize_idtype
 def test_empty_relation(idtype):
    """Test the features of batched DGLGraphs"""
-    g1 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([], [])
-    }, idtype=idtype, device=F.ctx())
-    g1.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
-    g1.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
-    g1.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
-    g1.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
-
-    g2 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1], [0, 0])
-    }, idtype=idtype, device=F.ctx())
-    g2.nodes['user'].data['h1'] = F.tensor([[0.], [1.], [2.]])
-    g2.nodes['user'].data['h2'] = F.tensor([[3.], [4.], [5.]])
-    g2.nodes['game'].data['h1'] = F.tensor([[0.]])
-    g2.nodes['game'].data['h2'] = F.tensor([[1.]])
-    g2.edges['follows'].data['h1'] = F.tensor([[0.], [1.]])
-    g2.edges['follows'].data['h2'] = F.tensor([[2.], [3.]])
-    g2.edges['plays'].data['h1'] = F.tensor([[0.], [1.]])
+    g1 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([], []),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g1.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
+    g1.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
+    g1.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
+    g1.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
+
+    g2 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1], [0, 0]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g2.nodes["user"].data["h1"] = F.tensor([[0.0], [1.0], [2.0]])
+    g2.nodes["user"].data["h2"] = F.tensor([[3.0], [4.0], [5.0]])
+    g2.nodes["game"].data["h1"] = F.tensor([[0.0]])
+    g2.nodes["game"].data["h2"] = F.tensor([[1.0]])
+    g2.edges["follows"].data["h1"] = F.tensor([[0.0], [1.0]])
+    g2.edges["follows"].data["h2"] = F.tensor([[2.0], [3.0]])
+    g2.edges["plays"].data["h1"] = F.tensor([[0.0], [1.0]])

    bg = dgl.batch([g1, g2])

    # Test number of nodes
    for ntype in bg.ntypes:
        assert F.asnumpy(bg.batch_num_nodes(ntype)).tolist() == [
-            g1.number_of_nodes(ntype), g2.number_of_nodes(ntype)]
+            g1.number_of_nodes(ntype),
+            g2.number_of_nodes(ntype),
+        ]

    # Test number of edges
    for etype in bg.canonical_etypes:
        assert F.asnumpy(bg.batch_num_edges(etype)).tolist() == [
-            g1.number_of_edges(etype), g2.number_of_edges(etype)]
+            g1.number_of_edges(etype),
+            g2.number_of_edges(etype),
+        ]

    # Test features
-    assert F.allclose(bg.nodes['user'].data['h1'],
-                      F.cat([g1.nodes['user'].data['h1'], g2.nodes['user'].data['h1']], dim=0))
-    assert F.allclose(bg.nodes['user'].data['h2'],
-                      F.cat([g1.nodes['user'].data['h2'], g2.nodes['user'].data['h2']], dim=0))
-    assert F.allclose(bg.nodes['game'].data['h1'], g2.nodes['game'].data['h1'])
-    assert F.allclose(bg.nodes['game'].data['h2'], g2.nodes['game'].data['h2'])
-    assert F.allclose(bg.edges['follows'].data['h1'],
-                      F.cat([g1.edges['follows'].data['h1'], g2.edges['follows'].data['h1']], dim=0))
-    assert F.allclose(bg.edges['plays'].data['h1'],
-                      g2.edges['plays'].data['h1'])
+    assert F.allclose(
+        bg.nodes["user"].data["h1"],
+        F.cat(
+            [g1.nodes["user"].data["h1"], g2.nodes["user"].data["h1"]], dim=0
+        ),
+    )
+    assert F.allclose(
+        bg.nodes["user"].data["h2"],
+        F.cat(
+            [g1.nodes["user"].data["h2"], g2.nodes["user"].data["h2"]], dim=0
+        ),
+    )
+    assert F.allclose(bg.nodes["game"].data["h1"], g2.nodes["game"].data["h1"])
+    assert F.allclose(bg.nodes["game"].data["h2"], g2.nodes["game"].data["h2"])
+    assert F.allclose(
+        bg.edges["follows"].data["h1"],
+        F.cat(
+            [g1.edges["follows"].data["h1"], g2.edges["follows"].data["h1"]],
+            dim=0,
+        ),
+    )
+    assert F.allclose(
+        bg.edges["plays"].data["h1"], g2.edges["plays"].data["h1"]
+    )

    # Test unbatching graphs
    g3, g4 = dgl.unbatch(bg)
    check_equivalence_between_heterographs(
-        g1, g3,
-        node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']},
-        edge_attrs={('user', 'follows', 'user'): ['h1']})
+        g1,
+        g3,
+        node_attrs={"user": ["h1", "h2"], "game": ["h1", "h2"]},
+        edge_attrs={("user", "follows", "user"): ["h1"]},
+    )
    check_equivalence_between_heterographs(
-        g2, g4,
-        node_attrs={'user': ['h1', 'h2'], 'game': ['h1', 'h2']},
-        edge_attrs={('user', 'follows', 'user'): ['h1']})
+        g2,
+        g4,
+        node_attrs={"user": ["h1", "h2"], "game": ["h1", "h2"]},
+        edge_attrs={("user", "follows", "user"): ["h1"]},
+    )

    # Test graphs without edges
-    g1 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 0, 'v': 4})
-    g2 = dgl.heterograph({('u', 'r', 'v'): ([], [])}, {'u': 1, 'v': 5})
+    g1 = dgl.heterograph({("u", "r", "v"): ([], [])}, {"u": 0, "v": 4})
+    g2 = dgl.heterograph({("u", "r", "v"): ([], [])}, {"u": 1, "v": 5})
    dgl.batch([g1, g2])


@@ -314,10 +442,10 @@ def test_unbatch2(idtype):
    bnn = F.tensor([8, 4])
    bne = F.tensor([6, 3])
    f1, f2 = dgl.unbatch(bg, node_split=bnn, edge_split=bne)
-    u, v = f1.edges(order='eid')
+    u, v = f1.edges(order="eid")
    assert F.allclose(u, F.tensor([0, 1, 2, 4, 5, 6]))
    assert F.allclose(v, F.tensor([1, 2, 3, 5, 6, 7]))
-    u, v = f2.edges(order='eid')
+    u, v = f2.edges(order="eid")
    assert F.allclose(u, F.tensor([0, 1, 2]))
    assert F.allclose(v, F.tensor([1, 2, 3]))

@@ -331,28 +459,42 @@ def test_unbatch2(idtype):

 @parametrize_idtype
 def test_slice_batch(idtype):
-    g1 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([], []),
-        ('user', 'follows', 'game'): ([0, 0], [1, 4])
-    }, idtype=idtype, device=F.ctx())
-    g2 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1], [0, 0]),
-        ('user', 'follows', 'game'): ([0, 1], [1, 4])
-    }, num_nodes_dict={'user': 4, 'game': 6}, idtype=idtype, device=F.ctx())
-    g3 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0], [2]),
-        ('user', 'plays', 'game'): ([1, 2], [3, 4]),
-        ('user', 'follows', 'game'): ([], [])
-    }, idtype=idtype, device=F.ctx())
+    g1 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([], []),
+            ("user", "follows", "game"): ([0, 0], [1, 4]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g2 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1], [0, 0]),
+            ("user", "follows", "game"): ([0, 1], [1, 4]),
+        },
+        num_nodes_dict={"user": 4, "game": 6},
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    g3 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0], [2]),
+            ("user", "plays", "game"): ([1, 2], [3, 4]),
+            ("user", "follows", "game"): ([], []),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
    g_list = [g1, g2, g3]
    bg = dgl.batch(g_list)
-    bg.nodes['user'].data['h1'] = F.randn((bg.num_nodes('user'), 2))
-    bg.nodes['user'].data['h2'] = F.randn((bg.num_nodes('user'), 5))
-    bg.edges[('user', 'follows', 'user')].data['h1'] = F.randn((
-        bg.num_edges(('user', 'follows', 'user')), 2))
-    for fmat in ['coo', 'csr', 'csc']:
+    bg.nodes["user"].data["h1"] = F.randn((bg.num_nodes("user"), 2))
+    bg.nodes["user"].data["h2"] = F.randn((bg.num_nodes("user"), 5))
+    bg.edges[("user", "follows", "user")].data["h1"] = F.randn(
+        (bg.num_edges(("user", "follows", "user")), 2)
+    )
+    for fmat in ["coo", "csr", "csc"]:
        bg = bg.formats(fmat)
        for i in range(len(g_list)):
            g_i = g_list[i]
@@ -364,22 +506,28 @@ def test_slice_batch(idtype):
            for nty in g_i.ntypes:
                assert g_i.num_nodes(nty) == g_slice.num_nodes(nty)
                for feat in g_i.nodes[nty].data:
-                    assert F.allclose(g_i.nodes[nty].data[feat], g_slice.nodes[nty].data[feat])
+                    assert F.allclose(
+                        g_i.nodes[nty].data[feat], g_slice.nodes[nty].data[feat]
+                    )

            for ety in g_i.canonical_etypes:
                assert g_i.num_edges(ety) == g_slice.num_edges(ety)
                for feat in g_i.edges[ety].data:
-                    assert F.allclose(g_i.edges[ety].data[feat], g_slice.edges[ety].data[feat])
+                    assert F.allclose(
+                        g_i.edges[ety].data[feat], g_slice.edges[ety].data[feat]
+                    )


 @parametrize_idtype
 def test_batch_keeps_empty_data(idtype):
-    g1 = dgl.heterograph({("a", "to", "a"): ([], [])}
-                         ).astype(idtype).to(F.ctx())
+    g1 = (
+        dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx())
+    )
    g1.nodes["a"].data["nh"] = F.tensor([])
    g1.edges[("a", "to", "a")].data["eh"] = F.tensor([])
-    g2 = dgl.heterograph({("a", "to", "a"): ([], [])}
-                         ).astype(idtype).to(F.ctx())
+    g2 = (
+        dgl.heterograph({("a", "to", "a"): ([], [])}).astype(idtype).to(F.ctx())
+    )
    g2.nodes["a"].data["nh"] = F.tensor([])
    g2.edges[("a", "to", "a")].data["eh"] = F.tensor([])
    g = dgl.batch([g1, g2])
@@ -387,27 +535,35 @@ def test_batch_keeps_empty_data(idtype):
    assert "eh" in g.edges[("a", "to", "a")].data


-@unittest.skipIf(F._default_context_str == 'gpu', reason="Issue is not related with GPU")
+@unittest.skipIf(
+    F._default_context_str == "gpu", reason="Issue is not related with GPU"
+)
 def test_batch_netypes():
    # Test for https://github.com/dmlc/dgl/issues/2808
    import networkx as nx
+
    B = nx.DiGraph()
-    B.add_nodes_from([1, 2, 3, 4], bipartite=0,
-                     some_attr=F.tensor([1, 2, 3, 4], dtype=F.float32))
+    B.add_nodes_from(
+        [1, 2, 3, 4],
+        bipartite=0,
+        some_attr=F.tensor([1, 2, 3, 4], dtype=F.float32),
+    )
    B.add_nodes_from(["a", "b", "c"], bipartite=1)
-    B.add_edges_from([(1, "a"), (1, "b"), (2, "b"),
-                      (2, "c"), (3, "c"), (4, "a")])
-
-    g_dict = {0: dgl.bipartite_from_networkx(B, 'A', 'e', 'B'),
-              1: dgl.bipartite_from_networkx(B, 'B', 'e', 'A'),
-              2: dgl.bipartite_from_networkx(B, 'A', 'e', 'B', u_attrs=['some_attr']),
-              3: dgl.bipartite_from_networkx(B, 'B', 'e', 'A', u_attrs=['some_attr'])
-              }
+    B.add_edges_from(
+        [(1, "a"), (1, "b"), (2, "b"), (2, "c"), (3, "c"), (4, "a")]
+    )
+
+    g_dict = {
+        0: dgl.bipartite_from_networkx(B, "A", "e", "B"),
+        1: dgl.bipartite_from_networkx(B, "B", "e", "A"),
+        2: dgl.bipartite_from_networkx(B, "A", "e", "B", u_attrs=["some_attr"]),
+        3: dgl.bipartite_from_networkx(B, "B", "e", "A", u_attrs=["some_attr"]),
+    }
    for _, g in g_dict.items():
        dgl.batch((g, g, g))


-if __name__ == '__main__':
+if __name__ == "__main__":
    # test_topology('int32')
    # test_batching_batched('int32')
    # test_batched_features('int32')

--- a/tests/python/common/test_ffi.py
+++ b/tests/python/common/test_ffi.py
@@ -2,10 +2,10 @@ import os
 import unittest

 import backend as F
-import numpy as np
-import pytest

 import dgl
+import numpy as np
+import pytest


 @unittest.skipIf(os.name == "nt", reason="Cython only works on linux")

--- a/tests/python/common/test_frame.py
+++ b/tests/python/common/test_frame.py
@@ -2,12 +2,12 @@ import pickle
 import unittest

 import backend as F
-import numpy as np
-from test_utils import parametrize_idtype

 import dgl
 import dgl.ndarray as nd
+import numpy as np
 from dgl.frame import Column
+from test_utils import parametrize_idtype


 def test_column_subcolumn():

--- a/tests/python/common/test_generators.py
+++ b/tests/python/common/test_generators.py
 import unittest

 import backend as F
-import numpy as np

 import dgl
+import numpy as np


 @unittest.skipIf(

--- a/tests/python/common/test_heterograph-apply-edges.py
+++ b/tests/python/common/test_heterograph-apply-edges.py
@@ -4,18 +4,18 @@ from collections import Counter
 from itertools import product

 import backend as F
+
+import dgl
+import dgl.function as fn
 import networkx as nx
 import numpy as np
 import pytest
 import scipy.sparse as ssp
 import test_utils
+from dgl import DGLError
 from scipy.sparse import rand
 from test_utils import get_cases, parametrize_idtype

-import dgl
-import dgl.function as fn
-from dgl import DGLError
-
 rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
 fill_value = {"sum": 0, "max": float("-inf")}
 feat_size = 2
@@ -51,7 +51,6 @@ def create_test_heterograph(idtype):
 @parametrize_idtype
 def test_unary_copy_u(idtype):
    def _test(mfunc):
-
        g = create_test_heterograph(idtype)

        x1 = F.randn((g.num_nodes("user"), feat_size))
@@ -108,7 +107,6 @@ def test_unary_copy_u(idtype):
 @parametrize_idtype
 def test_unary_copy_e(idtype):
    def _test(mfunc):
-
        g = create_test_heterograph(idtype)
        feat_size = 2

@@ -168,7 +166,6 @@ def test_unary_copy_e(idtype):
 @parametrize_idtype
 def test_binary_op(idtype):
    def _test(lhs, rhs, binary_op):
-
        g = create_test_heterograph(idtype)

        n1 = F.randn((g.num_nodes("user"), feat_size))
@@ -237,6 +234,7 @@ def test_binary_op(idtype):
            loss = F.sum(r2.view(-1), 0)
            F.backward(loss)
            n_grad2 = F.grad(g.nodes["game"].data["h"])
+
        # correctness check
        def _print_error(a, b):
            for i, (x, y) in enumerate(

--- a/tests/python/common/test_heterograph-kernel.py
+++ b/tests/python/common/test_heterograph-kernel.py
+from itertools import product
+
+import backend as F
+
 import dgl
 import dgl.function as fn
 import networkx as nx
 import numpy as np
-import backend as F
-from itertools import product
-from test_utils import parametrize_idtype, get_cases
 import pytest
+from test_utils import get_cases, parametrize_idtype
+

 def udf_copy_src(edges):
-    return {'m': edges.src['u']}
+    return {"m": edges.src["u"]}
+

 def udf_copy_edge(edges):
-    return {'m': edges.data['e']}
+    return {"m": edges.data["e"]}
+

 def udf_mean(nodes):
-    return {'r2': F.mean(nodes.mailbox['m'], 1)}
+    return {"r2": F.mean(nodes.mailbox["m"], 1)}
+

 def udf_sum(nodes):
-    return {'r2': F.sum(nodes.mailbox['m'], 1)}
+    return {"r2": F.sum(nodes.mailbox["m"], 1)}
+

 def udf_max(nodes):
-    return {'r2': F.max(nodes.mailbox['m'], 1)}
+    return {"r2": F.max(nodes.mailbox["m"], 1)}


 D1 = 5
 D2 = 3
 D3 = 4
-D4 = 10 # NOTE(xiang): used to dot feature vector
-builtin = {'sum': fn.sum, 'max': fn.max, 'mean': fn.mean}
-udf_reduce = {'sum': udf_sum, 'max': udf_max, 'mean': udf_mean}
-fill_value = {'sum': 0, 'max': float("-inf")}
+D4 = 10  # NOTE(xiang): used to dot feature vector
+builtin = {"sum": fn.sum, "max": fn.max, "mean": fn.mean}
+udf_reduce = {"sum": udf_sum, "max": udf_max, "mean": udf_mean}
+fill_value = {"sum": 0, "max": float("-inf")}


-def generate_feature(g, broadcast='none', binary_op='none'):
+def generate_feature(g, broadcast="none", binary_op="none"):
    """Create graph with src, edge, dst feature. broadcast can be 'u',
    'e', 'v', 'none'
    """
    np.random.seed(31)
    nv = g.number_of_nodes()
    ne = g.number_of_edges()
-    if binary_op == 'dot':
-        if broadcast == 'e':
+    if binary_op == "dot":
+        if broadcast == "e":
            u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
            e = F.tensor(np.random.uniform(-1, 1, (ne, D2, 1, D4)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
-        elif broadcast == 'u':
+        elif broadcast == "u":
            u = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1, D4)))
            e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
-        elif broadcast == 'v':
+        elif broadcast == "v":
            u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
            e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1, D4)))
@@ -57,15 +64,15 @@ def generate_feature(g, broadcast='none', binary_op='none'):
            e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3, D4)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3, D4)))
    else:
-        if broadcast == 'e':
+        if broadcast == "e":
            u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
            e = F.tensor(np.random.uniform(-1, 1, (ne, D2, 1)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
-        elif broadcast == 'u':
+        elif broadcast == "u":
            u = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1)))
            e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
-        elif broadcast == 'v':
+        elif broadcast == "v":
            u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
            e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D2, 1)))
@@ -73,7 +80,11 @@ def generate_feature(g, broadcast='none', binary_op='none'):
            u = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
            e = F.tensor(np.random.uniform(-1, 1, (ne, D1, D2, D3)))
            v = F.tensor(np.random.uniform(-1, 1, (nv, D1, D2, D3)))
-    return F.astype(u, F.float32), F.astype(v, F.float32), F.astype(e, F.float32)
+    return (
+        F.astype(u, F.float32),
+        F.astype(v, F.float32),
+        F.astype(e, F.float32),
+    )


 def test_copy_src_reduce():
@@ -83,60 +94,65 @@ def test_copy_src_reduce():
        # https://github.com/dmlc/dgl/issues/761
        g.add_edges(g.nodes(), g.nodes())
        g = g.to(F.ctx())
-        hu, hv, he = generate_feature(g, 'none', 'none')
+        hu, hv, he = generate_feature(g, "none", "none")
        if partial:
            nid = F.tensor(list(range(0, 100, 2)), g.idtype)

-        g.ndata['u'] = F.attach_grad(F.clone(hu))
-        g.ndata['v'] = F.attach_grad(F.clone(hv))
-        g.edata['e'] = F.attach_grad(F.clone(he))
+        g.ndata["u"] = F.attach_grad(F.clone(hu))
+        g.ndata["v"] = F.attach_grad(F.clone(hv))
+        g.edata["e"] = F.attach_grad(F.clone(he))

        with F.record_grad():
            if partial:
-                g.pull(nid, fn.copy_u(u='u', out='m'),
-                       builtin[red](msg='m', out='r1'))
+                g.pull(
+                    nid,
+                    fn.copy_u(u="u", out="m"),
+                    builtin[red](msg="m", out="r1"),
+                )
            else:
-                g.update_all(fn.copy_u(u='u', out='m'),
-                             builtin[red](msg='m', out='r1'))
-            r1 = g.ndata['r1']
+                g.update_all(
+                    fn.copy_u(u="u", out="m"), builtin[red](msg="m", out="r1")
+                )
+            r1 = g.ndata["r1"]
            F.backward(F.reduce_sum(r1))
-            n_grad1 = F.grad(g.ndata['u'])
+            n_grad1 = F.grad(g.ndata["u"])

        # reset grad
-        g.ndata['u'] = F.attach_grad(F.clone(hu))
-        g.ndata['v'] = F.attach_grad(F.clone(hv))
-        g.edata['e'] = F.attach_grad(F.clone(he))
+        g.ndata["u"] = F.attach_grad(F.clone(hu))
+        g.ndata["v"] = F.attach_grad(F.clone(hv))
+        g.edata["e"] = F.attach_grad(F.clone(he))

        with F.record_grad():
            if partial:
                g.pull(nid, udf_copy_src, udf_reduce[red])
            else:
                g.update_all(udf_copy_src, udf_reduce[red])
-            r2 = g.ndata['r2']
+            r2 = g.ndata["r2"]
            F.backward(F.reduce_sum(r2))
-            n_grad2 = F.grad(g.ndata['u'])
+            n_grad2 = F.grad(g.ndata["u"])

        def _print_error(a, b):
-            print("ERROR: Test copy_src_{} partial: {}".
-                  format(red, partial))
-            for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
+            print("ERROR: Test copy_src_{} partial: {}".format(red, partial))
+            for i, (x, y) in enumerate(
+                zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
+            ):
                if not np.allclose(x, y):
-                    print('@{} {} v.s. {}'.format(i, x, y))
+                    print("@{} {} v.s. {}".format(i, x, y))

        if not F.allclose(r1, r2):
            _print_error(r1, r2)
        assert F.allclose(r1, r2)
        if not F.allclose(n_grad1, n_grad2):
-            print('node grad')
+            print("node grad")
            _print_error(n_grad1, n_grad2)
-        assert(F.allclose(n_grad1, n_grad2))
+        assert F.allclose(n_grad1, n_grad2)

-    _test('sum', False)
-    _test('max', False)
-    _test('mean', False)
-    _test('sum', True)
-    _test('max', True)
-    _test('mean', True)
+    _test("sum", False)
+    _test("max", False)
+    _test("mean", False)
+    _test("sum", True)
+    _test("max", True)
+    _test("mean", True)


 def test_copy_edge_reduce():
@@ -145,80 +161,85 @@ def test_copy_edge_reduce():
        # NOTE(zihao): add self-loop to avoid zero-degree nodes.
        g.add_edges(g.nodes(), g.nodes())
        g = g.to(F.ctx())
-        hu, hv, he = generate_feature(g, 'none', 'none')
+        hu, hv, he = generate_feature(g, "none", "none")
        if partial:
            nid = F.tensor(list(range(0, 100, 2)), g.idtype)

-        g.ndata['u'] = F.attach_grad(F.clone(hu))
-        g.ndata['v'] = F.attach_grad(F.clone(hv))
-        g.edata['e'] = F.attach_grad(F.clone(he))
+        g.ndata["u"] = F.attach_grad(F.clone(hu))
+        g.ndata["v"] = F.attach_grad(F.clone(hv))
+        g.edata["e"] = F.attach_grad(F.clone(he))

        with F.record_grad():
            if partial:
-                g.pull(nid, fn.copy_e(e='e', out='m'),
-                       builtin[red](msg='m', out='r1'))
+                g.pull(
+                    nid,
+                    fn.copy_e(e="e", out="m"),
+                    builtin[red](msg="m", out="r1"),
+                )
            else:
-                g.update_all(fn.copy_e(e='e', out='m'),
-                             builtin[red](msg='m', out='r1'))
-            r1 = g.ndata['r1']
+                g.update_all(
+                    fn.copy_e(e="e", out="m"), builtin[red](msg="m", out="r1")
+                )
+            r1 = g.ndata["r1"]
            F.backward(F.reduce_sum(r1))
-            e_grad1 = F.grad(g.edata['e'])
+            e_grad1 = F.grad(g.edata["e"])

        # reset grad
-        g.ndata['u'] = F.attach_grad(F.clone(hu))
-        g.ndata['v'] = F.attach_grad(F.clone(hv))
-        g.edata['e'] = F.attach_grad(F.clone(he))
+        g.ndata["u"] = F.attach_grad(F.clone(hu))
+        g.ndata["v"] = F.attach_grad(F.clone(hv))
+        g.edata["e"] = F.attach_grad(F.clone(he))

        with F.record_grad():
            if partial:
                g.pull(nid, udf_copy_edge, udf_reduce[red])
            else:
                g.update_all(udf_copy_edge, udf_reduce[red])
-            r2 = g.ndata['r2']
+            r2 = g.ndata["r2"]
            F.backward(F.reduce_sum(r2))
-            e_grad2 = F.grad(g.edata['e'])
+            e_grad2 = F.grad(g.edata["e"])

        def _print_error(a, b):
-            print("ERROR: Test copy_edge_{} partial: {}".
-                  format(red, partial))
+            print("ERROR: Test copy_edge_{} partial: {}".format(red, partial))
            return
-            for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
+            for i, (x, y) in enumerate(
+                zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
+            ):
                if not np.allclose(x, y):
-                    print('@{} {} v.s. {}'.format(i, x, y))
+                    print("@{} {} v.s. {}".format(i, x, y))

        if not F.allclose(r1, r2):
            _print_error(r1, r2)
        assert F.allclose(r1, r2)
        if not F.allclose(e_grad1, e_grad2):
-            print('edge gradient')
+            print("edge gradient")
            _print_error(e_grad1, e_grad2)
-        assert(F.allclose(e_grad1, e_grad2))
+        assert F.allclose(e_grad1, e_grad2)

-    _test('sum', False)
-    _test('max', False)
-    _test('mean', False)
-    _test('sum', True)
-    _test('max', True)
-    _test('mean', True)
+    _test("sum", False)
+    _test("max", False)
+    _test("mean", False)
+    _test("sum", True)
+    _test("max", True)
+    _test("mean", True)


 def test_all_binary_builtins():
-    def _test(g, lhs, rhs, binary_op, reducer, partial, nid, broadcast='none'):
+    def _test(g, lhs, rhs, binary_op, reducer, partial, nid, broadcast="none"):
        # initialize node/edge features with uniform(-1, 1)
        hu, hv, he = generate_feature(g, broadcast, binary_op)
-        if binary_op == 'div':
+        if binary_op == "div":
            # op = div
            # lhs range: [-1, 1]
            # rhs range: [1, 2]
            # result range: [-1, 1]
-            if rhs == 'u':
+            if rhs == "u":
                hu = (hu + 3) / 2
-            elif rhs == 'v':
+            elif rhs == "v":
                hv = (hv + 3) / 2
-            elif rhs == 'e':
+            elif rhs == "e":
                he = (he + 3) / 2

-        if binary_op == 'add' or binary_op == 'sub':
+        if binary_op == "add" or binary_op == "sub":
            # op = add, sub
            # lhs range: [-1/2, 1/2]
            # rhs range: [-1/2, 1/2]
@@ -227,9 +248,9 @@ def test_all_binary_builtins():
            hv = hv / 2
            he = he / 2

-        g.ndata['u'] = F.attach_grad(F.clone(hu))
-        g.ndata['v'] = F.attach_grad(F.clone(hv))
-        g.edata['e'] = F.attach_grad(F.clone(he))
+        g.ndata["u"] = F.attach_grad(F.clone(hu))
+        g.ndata["v"] = F.attach_grad(F.clone(hv))
+        g.edata["e"] = F.attach_grad(F.clone(he))

        builtin_msg_name = "{}_{}_{}".format(lhs, binary_op, rhs)
        builtin_msg = getattr(fn, builtin_msg_name)
@@ -245,18 +266,18 @@ def test_all_binary_builtins():

        with F.record_grad():
            if partial:
-                g.pull(nid, builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1'))
+                g.pull(nid, builtin_msg(lhs, rhs, "m"), builtin_red("m", "r1"))
            else:
-                g.update_all(builtin_msg(lhs, rhs, 'm'), builtin_red('m', 'r1'))
-            r1 = g.ndata.pop('r1')
+                g.update_all(builtin_msg(lhs, rhs, "m"), builtin_red("m", "r1"))
+            r1 = g.ndata.pop("r1")
            F.backward(F.reduce_sum(r1))
            lhs_grad_1 = F.grad(target_feature_switch(g, lhs))
            rhs_grad_1 = F.grad(target_feature_switch(g, rhs))

        # reset grad
-        g.ndata['u'] = F.attach_grad(F.clone(hu))
-        g.ndata['v'] = F.attach_grad(F.clone(hv))
-        g.edata['e'] = F.attach_grad(F.clone(he))
+        g.ndata["u"] = F.attach_grad(F.clone(hu))
+        g.ndata["v"] = F.attach_grad(F.clone(hv))
+        g.edata["e"] = F.attach_grad(F.clone(he))

        def target_switch(edges, target):
            if target == "u":
@@ -266,7 +287,7 @@ def test_all_binary_builtins():
            elif target == "e":
                return edges.data
            else:
-                assert(0), "Unknown target {}".format(target)
+                assert 0, "Unknown target {}".format(target)

        def mfunc(edges):
            op = getattr(F, binary_op)
@@ -282,15 +303,15 @@ def test_all_binary_builtins():

        def rfunc(nodes):
            op = getattr(F, reducer)
-            return {"r2": op(nodes.mailbox['m'], 1)}
+            return {"r2": op(nodes.mailbox["m"], 1)}

        with F.record_grad():
            if partial:
                g.pull(nid, mfunc, rfunc)
            else:
                g.update_all(mfunc, rfunc)
-            r2 = g.ndata.pop('r2')
-            F.backward(F.reduce_sum(r2), F.tensor([1.]))
+            r2 = g.ndata.pop("r2")
+            F.backward(F.reduce_sum(r2), F.tensor([1.0]))
            lhs_grad_2 = F.grad(target_feature_switch(g, lhs))
            rhs_grad_2 = F.grad(target_feature_switch(g, rhs))

@@ -298,27 +319,32 @@ def test_all_binary_builtins():
        atol = 1e-4

        def _print_error(a, b):
-            print("ERROR: Test {}_{}_{}_{} broadcast: {} partial: {}".
-                  format(lhs, binary_op, rhs, reducer, broadcast, partial))
+            print(
+                "ERROR: Test {}_{}_{}_{} broadcast: {} partial: {}".format(
+                    lhs, binary_op, rhs, reducer, broadcast, partial
+                )
+            )
            return
-            if lhs == 'u':
+            if lhs == "u":
                lhs_data = hu
-            elif lhs == 'v':
+            elif lhs == "v":
                lhs_data = hv
-            elif lhs == 'e':
+            elif lhs == "e":
                lhs_data = he

-            if rhs == 'u':
+            if rhs == "u":
                rhs_data = hu
-            elif rhs == 'v':
+            elif rhs == "v":
                rhs_data = hv
-            elif rhs == 'e':
+            elif rhs == "e":
                rhs_data = he
            print("lhs", F.asnumpy(lhs_data).tolist())
            print("rhs", F.asnumpy(rhs_data).tolist())
-            for i, (x, y) in enumerate(zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())):
+            for i, (x, y) in enumerate(
+                zip(F.asnumpy(a).flatten(), F.asnumpy(b).flatten())
+            ):
                if not np.allclose(x, y, rtol, atol):
-                    print('@{} {} v.s. {}'.format(i, x, y))
+                    print("@{} {} v.s. {}".format(i, x, y))

        if not F.allclose(r1, r2, rtol, atol):
            _print_error(r1, r2)
@@ -327,12 +353,12 @@ def test_all_binary_builtins():
        if not F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol):
            print("left grad")
            _print_error(lhs_grad_1, lhs_grad_2)
-        assert(F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol))
+        assert F.allclose(lhs_grad_1, lhs_grad_2, rtol, atol)

        if not F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol):
            print("right grad")
            _print_error(rhs_grad_1, rhs_grad_2)
-        assert(F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol))
+        assert F.allclose(rhs_grad_1, rhs_grad_2, rtol, atol)

    g = dgl.DGLGraph()
    g.add_nodes(20)
@@ -359,20 +385,30 @@ def test_all_binary_builtins():
                for broadcast in ["none", lhs, rhs]:
                    for partial in [False, True]:
                        print(lhs, rhs, binary_op, reducer, broadcast, partial)
-                        _test(g, lhs, rhs, binary_op, reducer, partial, nid,
-                              broadcast=broadcast)
+                        _test(
+                            g,
+                            lhs,
+                            rhs,
+                            binary_op,
+                            reducer,
+                            partial,
+                            nid,
+                            broadcast=broadcast,
+                        )
+

 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo-zero-degree']))
+@pytest.mark.parametrize("g", get_cases(["homo-zero-degree"]))
 def test_mean_zero_degree(g, idtype):
    g = g.astype(idtype).to(F.ctx())
-    g.ndata['h'] = F.ones((g.number_of_nodes(), 3))
-    g.update_all(fn.copy_u('h', 'm'), fn.mean('m', 'x'))
+    g.ndata["h"] = F.ones((g.number_of_nodes(), 3))
+    g.update_all(fn.copy_u("h", "m"), fn.mean("m", "x"))
    deg = F.asnumpy(g.in_degrees())
    v = F.tensor(np.where(deg == 0)[0])
-    assert F.allclose(F.gather_row(g.ndata['x'], v), F.zeros((len(v), 3)))
+    assert F.allclose(F.gather_row(g.ndata["x"], v), F.zeros((len(v), 3)))
+

-if __name__ == '__main__':
+if __name__ == "__main__":
    test_copy_src_reduce()
    test_copy_edge_reduce()
    test_all_binary_builtins()
--- a/tests/python/common/test_heterograph-misc.py
+++ b/tests/python/common/test_heterograph-misc.py
 import math
 import numbers
+
+import backend as F
+
+import dgl
+import networkx as nx
 import numpy as np
+import pytest
 import scipy.sparse as sp
-import networkx as nx
-import dgl
-import backend as F
 from dgl import DGLError
-import pytest
+

 # graph generation: a random graph with 10 nodes
 #  and 20 edges.
@@ -22,6 +25,7 @@ def edge_pair_input(sort=False):
        dst = [9, 6, 3, 9, 4, 4, 9, 9, 1, 8, 3, 2, 8, 1, 5, 7, 3, 2, 6, 5]
        return src, dst

+
 def nx_input():
    g = nx.DiGraph()
    src, dst = edge_pair_input()
@@ -29,22 +33,26 @@ def nx_input():
        g.add_edge(*e, id=i)
    return g

+
 def elist_input():
    src, dst = edge_pair_input()
    return list(zip(src, dst))

+
 def scipy_coo_input():
    src, dst = edge_pair_input()
-    return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10,10))
+    return sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10))
+

 def scipy_csr_input():
    src, dst = edge_pair_input()
-    csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10,10)).tocsr()
+    csr = sp.coo_matrix((np.ones((20,)), (src, dst)), shape=(10, 10)).tocsr()
    csr.sort_indices()
    # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
    # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
    return csr

+
 def gen_by_mutation():
    g = dgl.DGLGraph()
    src, dst = edge_pair_input()
@@ -52,9 +60,11 @@ def gen_by_mutation():
    g.add_edges(src, dst)
    return g

+
 def gen_from_data(data, readonly, sort):
    return dgl.DGLGraph(data, readonly=readonly, sort_csr=True)

+
 def test_query():
    def _test_one(g):
        assert g.number_of_nodes() == 10
@@ -63,45 +73,63 @@ def test_query():
        for i in range(10):
            assert g.has_nodes(i)
        assert not g.has_nodes(11)
-        assert F.allclose(g.has_nodes([0,2,10,11]), F.tensor([1,1,0,0]))
+        assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))

        src, dst = edge_pair_input()
        for u, v in zip(src, dst):
            assert g.has_edges_between(u, v)
        assert not g.has_edges_between(0, 0)
-        assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0,1,1]))
-        assert set(F.asnumpy(g.predecessors(9))) == set([0,5,7,4])
-        assert set(F.asnumpy(g.successors(2))) == set([7,3])
+        assert F.allclose(
+            g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
+        )
+        assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
+        assert set(F.asnumpy(g.successors(2))) == set([7, 3])

-        assert g.edge_ids(4,4) == 5
-        assert F.allclose(g.edge_ids([4,0], [4,9]), F.tensor([5,0]))
+        assert g.edge_ids(4, 4) == 5
+        assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([5, 0]))

        src, dst = g.find_edges([3, 6, 5])
        assert F.allclose(src, F.tensor([5, 7, 4]))
        assert F.allclose(dst, F.tensor([9, 9, 4]))

-        src, dst, eid = g.in_edges(9, form='all')
+        src, dst, eid = g.in_edges(9, form="all")
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,0),(5,9,3),(7,9,6),(4,9,7)])
-        src, dst, eid = g.in_edges([9,0,8], form='all')  # test node#0 has no in edges
+        assert set(tup) == set([(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7)])
+        src, dst, eid = g.in_edges(
+            [9, 0, 8], form="all"
+        )  # test node#0 has no in edges
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,0),(5,9,3),(7,9,6),(4,9,7),(3,8,9),(7,8,12)])
+        assert set(tup) == set(
+            [(0, 9, 0), (5, 9, 3), (7, 9, 6), (4, 9, 7), (3, 8, 9), (7, 8, 12)]
+        )

-        src, dst, eid = g.out_edges(0, form='all')
+        src, dst, eid = g.out_edges(0, form="all")
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,0),(0,6,1),(0,4,4)])
-        src, dst, eid = g.out_edges([0,4,8], form='all')  # test node#8 has no out edges
+        assert set(tup) == set([(0, 9, 0), (0, 6, 1), (0, 4, 4)])
+        src, dst, eid = g.out_edges(
+            [0, 4, 8], form="all"
+        )  # test node#8 has no out edges
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,0),(0,6,1),(0,4,4),(4,3,2),(4,4,5),(4,9,7),(4,1,8)])
-
-        src, dst, eid = g.edges('all', 'eid')
+        assert set(tup) == set(
+            [
+                (0, 9, 0),
+                (0, 6, 1),
+                (0, 4, 4),
+                (4, 3, 2),
+                (4, 4, 5),
+                (4, 9, 7),
+                (4, 1, 8),
+            ]
+        )
+
+        src, dst, eid = g.edges("all", "eid")
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(eid)) == list(range(20))

-        src, dst, eid = g.edges('all', 'srcdst')
+        src, dst, eid = g.edges("all", "srcdst")
        t_src, t_dst = edge_pair_input()
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
@@ -116,9 +144,13 @@ def test_query():
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
+            F.sparse_to_numpy(g.adjacency_matrix(transpose=True)),
+            scipy_coo_input().toarray().T,
+        )
        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
+            F.sparse_to_numpy(g.adjacency_matrix(transpose=False)),
+            scipy_coo_input().toarray(),
+        )

    def _test(g):
        # test twice to see whether the cached format works or not
@@ -132,48 +164,73 @@ def test_query():
        for i in range(10):
            assert g.has_nodes(i)
        assert not g.has_nodes(11)
-        assert F.allclose(g.has_nodes([0,2,10,11]), F.tensor([1,1,0,0]))
+        assert F.allclose(g.has_nodes([0, 2, 10, 11]), F.tensor([1, 1, 0, 0]))

        src, dst = edge_pair_input(sort=True)
        for u, v in zip(src, dst):
            assert g.has_edges_between(u, v)
        assert not g.has_edges_between(0, 0)
-        assert F.allclose(g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0,1,1]))
-        assert set(F.asnumpy(g.predecessors(9))) == set([0,5,7,4])
-        assert set(F.asnumpy(g.successors(2))) == set([7,3])
+        assert F.allclose(
+            g.has_edges_between([0, 0, 3], [0, 9, 8]), F.tensor([0, 1, 1])
+        )
+        assert set(F.asnumpy(g.predecessors(9))) == set([0, 5, 7, 4])
+        assert set(F.asnumpy(g.successors(2))) == set([7, 3])

        # src = [0 0 0 1 1 2 2 3 3 4 4 4 4 5 5 6 7 7 7 9]
        # dst = [4 6 9 3 5 3 7 5 8 1 3 4 9 1 9 6 2 8 9 2]
        # eid = [0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9]
-        assert g.edge_ids(4,4) == 11
-        assert F.allclose(g.edge_ids([4,0], [4,9]), F.tensor([11,2]))
+        assert g.edge_ids(4, 4) == 11
+        assert F.allclose(g.edge_ids([4, 0], [4, 9]), F.tensor([11, 2]))

        src, dst = g.find_edges([3, 6, 5])
        assert F.allclose(src, F.tensor([1, 2, 2]))
        assert F.allclose(dst, F.tensor([3, 7, 3]))

-        src, dst, eid = g.in_edges(9, form='all')
+        src, dst, eid = g.in_edges(9, form="all")
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,2),(5,9,14),(7,9,18),(4,9,12)])
-        src, dst, eid = g.in_edges([9,0,8], form='all')  # test node#0 has no in edges
+        assert set(tup) == set([(0, 9, 2), (5, 9, 14), (7, 9, 18), (4, 9, 12)])
+        src, dst, eid = g.in_edges(
+            [9, 0, 8], form="all"
+        )  # test node#0 has no in edges
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,2),(5,9,14),(7,9,18),(4,9,12),(3,8,8),(7,8,17)])
-
-        src, dst, eid = g.out_edges(0, form='all')
+        assert set(tup) == set(
+            [
+                (0, 9, 2),
+                (5, 9, 14),
+                (7, 9, 18),
+                (4, 9, 12),
+                (3, 8, 8),
+                (7, 8, 17),
+            ]
+        )
+
+        src, dst, eid = g.out_edges(0, form="all")
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,2),(0,6,1),(0,4,0)])
-        src, dst, eid = g.out_edges([0,4,8], form='all')  # test node#8 has no out edges
+        assert set(tup) == set([(0, 9, 2), (0, 6, 1), (0, 4, 0)])
+        src, dst, eid = g.out_edges(
+            [0, 4, 8], form="all"
+        )  # test node#8 has no out edges
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
-        assert set(tup) == set([(0,9,2),(0,6,1),(0,4,0),(4,3,10),(4,4,11),(4,9,12),(4,1,9)])
-
-        src, dst, eid = g.edges('all', 'eid')
+        assert set(tup) == set(
+            [
+                (0, 9, 2),
+                (0, 6, 1),
+                (0, 4, 0),
+                (4, 3, 10),
+                (4, 4, 11),
+                (4, 9, 12),
+                (4, 1, 9),
+            ]
+        )
+
+        src, dst, eid = g.edges("all", "eid")
        t_src, t_dst = edge_pair_input(sort=True)
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
        assert set(tup) == set(t_tup)
        assert list(F.asnumpy(eid)) == list(range(20))

-        src, dst, eid = g.edges('all', 'srcdst')
+        src, dst, eid = g.edges("all", "srcdst")
        t_src, t_dst = edge_pair_input(sort=True)
        t_tup = list(zip(t_src, t_dst, list(range(20))))
        tup = list(zip(F.asnumpy(src), F.asnumpy(dst), F.asnumpy(eid)))
@@ -188,9 +245,13 @@ def test_query():
        assert F.allclose(g.out_degrees([8, 9]), F.tensor([0, 1]))

        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=True)), scipy_coo_input().toarray().T)
+            F.sparse_to_numpy(g.adjacency_matrix(transpose=True)),
+            scipy_coo_input().toarray().T,
+        )
        assert np.array_equal(
-                F.sparse_to_numpy(g.adjacency_matrix(transpose=False)), scipy_coo_input().toarray())
+            F.sparse_to_numpy(g.adjacency_matrix(transpose=False)),
+            scipy_coo_input().toarray(),
+        )

    def _test_csr(g):
        # test twice to see whether the cached format works or not
@@ -199,18 +260,18 @@ def test_query():

    def _test_edge_ids():
        g = gen_by_mutation()
-        eids = g.edge_ids([4,0], [4,9])
+        eids = g.edge_ids([4, 0], [4, 9])
        assert eids.shape[0] == 2
        eid = g.edge_ids(4, 4)
        assert isinstance(eid, numbers.Number)
        with pytest.raises(DGLError):
-            eids = g.edge_ids([9,0], [4,9])
+            eids = g.edge_ids([9, 0], [4, 9])

        with pytest.raises(DGLError):
            eid = g.edge_ids(4, 5)

        g.add_edges(0, 4)
-        eids = g.edge_ids([0,0], [4,9])
+        eids = g.edge_ids([0, 0], [4, 9])
        eid = g.edge_ids(0, 4)

    _test(gen_by_mutation())
@@ -224,75 +285,104 @@ def test_query():
    _test_csr(gen_from_data(scipy_csr_input(), True, False))
    _test_edge_ids()

+
 def test_mutation():
    g = dgl.DGLGraph()
    g = g.to(F.ctx())
    # test add nodes with data
    g.add_nodes(5)
-    g.add_nodes(5, {'h' : F.ones((5, 2))})
+    g.add_nodes(5, {"h": F.ones((5, 2))})
    ans = F.cat([F.zeros((5, 2)), F.ones((5, 2))], 0)
-    assert F.allclose(ans, g.ndata['h'])
-    g.ndata['w'] = 2 * F.ones((10, 2))
-    assert F.allclose(2 * F.ones((10, 2)), g.ndata['w'])
+    assert F.allclose(ans, g.ndata["h"])
+    g.ndata["w"] = 2 * F.ones((10, 2))
+    assert F.allclose(2 * F.ones((10, 2)), g.ndata["w"])
    # test add edges with data
    g.add_edges([2, 3], [3, 4])
-    g.add_edges([0, 1], [1, 2], {'m' : F.ones((2, 2))})
+    g.add_edges([0, 1], [1, 2], {"m": F.ones((2, 2))})
    ans = F.cat([F.zeros((2, 2)), F.ones((2, 2))], 0)
-    assert F.allclose(ans, g.edata['m'])
+    assert F.allclose(ans, g.edata["m"])
+

 def test_scipy_adjmat():
    g = dgl.DGLGraph()
    g.add_nodes(10)
    g.add_edges(range(9), range(1, 10))

-    adj_0 = g.adj(scipy_fmt='csr')
-    adj_1 = g.adj(scipy_fmt='coo')
+    adj_0 = g.adj(scipy_fmt="csr")
+    adj_1 = g.adj(scipy_fmt="coo")
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

-    adj_t0 = g.adj(transpose=False, scipy_fmt='csr')
-    adj_t_1 = g.adj(transpose=False, scipy_fmt='coo')
+    adj_t0 = g.adj(transpose=False, scipy_fmt="csr")
+    adj_t_1 = g.adj(transpose=False, scipy_fmt="coo")
    assert np.array_equal(adj_0.toarray(), adj_1.toarray())

+
 def test_incmat():
    g = dgl.DGLGraph()
    g.add_nodes(4)
-    g.add_edges(0, 1) # 0
-    g.add_edges(0, 2) # 1
-    g.add_edges(0, 3) # 2
-    g.add_edges(2, 3) # 3
-    g.add_edges(1, 1) # 4
-    inc_in = F.sparse_to_numpy(g.incidence_matrix('in'))
-    inc_out = F.sparse_to_numpy(g.incidence_matrix('out'))
-    inc_both = F.sparse_to_numpy(g.incidence_matrix('both'))
+    g.add_edges(0, 1)  # 0
+    g.add_edges(0, 2)  # 1
+    g.add_edges(0, 3)  # 2
+    g.add_edges(2, 3)  # 3
+    g.add_edges(1, 1)  # 4
+    inc_in = F.sparse_to_numpy(g.incidence_matrix("in"))
+    inc_out = F.sparse_to_numpy(g.incidence_matrix("out"))
+    inc_both = F.sparse_to_numpy(g.incidence_matrix("both"))
    print(inc_in)
    print(inc_out)
    print(inc_both)
    assert np.allclose(
-            inc_in,
-            np.array([[0., 0., 0., 0., 0.],
-                      [1., 0., 0., 0., 1.],
-                      [0., 1., 0., 0., 0.],
-                      [0., 0., 1., 1., 0.]]))
+        inc_in,
+        np.array(
+            [
+                [0.0, 0.0, 0.0, 0.0, 0.0],
+                [1.0, 0.0, 0.0, 0.0, 1.0],
+                [0.0, 1.0, 0.0, 0.0, 0.0],
+                [0.0, 0.0, 1.0, 1.0, 0.0],
+            ]
+        ),
+    )
    assert np.allclose(
-            inc_out,
-            np.array([[1., 1., 1., 0., 0.],
-                      [0., 0., 0., 0., 1.],
-                      [0., 0., 0., 1., 0.],
-                      [0., 0., 0., 0., 0.]]))
+        inc_out,
+        np.array(
+            [
+                [1.0, 1.0, 1.0, 0.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, 1.0],
+                [0.0, 0.0, 0.0, 1.0, 0.0],
+                [0.0, 0.0, 0.0, 0.0, 0.0],
+            ]
+        ),
+    )
    assert np.allclose(
-            inc_both,
-            np.array([[-1., -1., -1., 0., 0.],
-                      [1., 0., 0., 0., 0.],
-                      [0., 1., 0., -1., 0.],
-                      [0., 0., 1., 1., 0.]]))
+        inc_both,
+        np.array(
+            [
+                [-1.0, -1.0, -1.0, 0.0, 0.0],
+                [1.0, 0.0, 0.0, 0.0, 0.0],
+                [0.0, 1.0, 0.0, -1.0, 0.0],
+                [0.0, 0.0, 1.0, 1.0, 0.0],
+            ]
+        ),
+    )
+

 def test_find_edges():
    g = dgl.DGLGraph()
    g.add_nodes(10)
    g.add_edges(range(9), range(1, 10))
    e = g.find_edges([1, 3, 2, 4])
-    assert F.asnumpy(e[0][0]) == 1 and F.asnumpy(e[0][1]) == 3 and F.asnumpy(e[0][2]) == 2 and F.asnumpy(e[0][3]) == 4
-    assert F.asnumpy(e[1][0]) == 2 and F.asnumpy(e[1][1]) == 4 and F.asnumpy(e[1][2]) == 3 and F.asnumpy(e[1][3]) == 5
+    assert (
+        F.asnumpy(e[0][0]) == 1
+        and F.asnumpy(e[0][1]) == 3
+        and F.asnumpy(e[0][2]) == 2
+        and F.asnumpy(e[0][3]) == 4
+    )
+    assert (
+        F.asnumpy(e[1][0]) == 2
+        and F.asnumpy(e[1][1]) == 4
+        and F.asnumpy(e[1][2]) == 3
+        and F.asnumpy(e[1][3]) == 5
+    )

    try:
        g.find_edges([10])
@@ -302,6 +392,7 @@ def test_find_edges():
    finally:
        assert fail

+
 def test_ismultigraph():
    g = dgl.DGLGraph()
    g.add_nodes(10)
@@ -313,6 +404,7 @@ def test_ismultigraph():
    g.add_edges([0, 2], [0, 3])
    assert g.is_multigraph == True

+
 def test_hypersparse_query():
    g = dgl.DGLGraph()
    g = g.to(F.ctx())
@@ -323,14 +415,15 @@ def test_hypersparse_query():
    assert not g.has_nodes(1000002)
    assert g.edge_ids(0, 1) == 0
    src, dst = g.find_edges([0])
-    src, dst, eid = g.in_edges(1, form='all')
-    src, dst, eid = g.out_edges(0, form='all')
+    src, dst, eid = g.in_edges(1, form="all")
+    src, dst, eid = g.out_edges(0, form="all")
    src, dst = g.edges()
    assert g.in_degrees(0) == 0
    assert g.in_degrees(1) == 1
    assert g.out_degrees(0) == 1
    assert g.out_degrees(1) == 0

+
 def test_empty_data_initialized():
    g = dgl.DGLGraph()
    g = g.to(F.ctx())
@@ -339,30 +432,31 @@ def test_empty_data_initialized():
    assert "ha" in g.ndata
    assert len(g.ndata["ha"]) == 1

+
 def test_is_sorted():
-   u_src, u_dst = edge_pair_input(False)
-   s_src, s_dst = edge_pair_input(True)
+    u_src, u_dst = edge_pair_input(False)
+    s_src, s_dst = edge_pair_input(True)

-   u_src = F.tensor(u_src, dtype=F.int32)
-   u_dst = F.tensor(u_dst, dtype=F.int32)
-   s_src = F.tensor(s_src, dtype=F.int32)
-   s_dst = F.tensor(s_dst, dtype=F.int32)
+    u_src = F.tensor(u_src, dtype=F.int32)
+    u_dst = F.tensor(u_dst, dtype=F.int32)
+    s_src = F.tensor(s_src, dtype=F.int32)
+    s_dst = F.tensor(s_dst, dtype=F.int32)

-   src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
-   assert src_sorted == False
-   assert dst_sorted == False
+    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
+    assert src_sorted == False
+    assert dst_sorted == False

-   src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, s_dst)
-   assert src_sorted == True
-   assert dst_sorted == True
+    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, s_dst)
+    assert src_sorted == True
+    assert dst_sorted == True

-   src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
-   assert src_sorted == False
-   assert dst_sorted == False
+    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(u_src, u_dst)
+    assert src_sorted == False
+    assert dst_sorted == False

-   src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, u_dst)
-   assert src_sorted == True
-   assert dst_sorted == False
+    src_sorted, dst_sorted = dgl.utils.is_sorted_srcdst(s_src, u_dst)
+    assert src_sorted == True
+    assert dst_sorted == False


 def test_default_types():
@@ -379,10 +473,10 @@ def test_formats():
    try:
        g.in_degrees()
        g.out_degrees()
-        g.formats('coo').in_degrees()
-        g.formats('coo').out_degrees()
-        g.formats('csc').in_degrees()
-        g.formats('csr').out_degrees()
+        g.formats("coo").in_degrees()
+        g.formats("coo").out_degrees()
+        g.formats("csc").in_degrees()
+        g.formats("csr").out_degrees()
        fail = False
    except DGLError:
        fail = True
@@ -390,7 +484,7 @@ def test_formats():
        assert not fail
    # in_degrees NOT works if csc available only
    try:
-        g.formats('csc').out_degrees()
+        g.formats("csc").out_degrees()
        fail = True
    except DGLError:
        fail = False
@@ -398,14 +492,15 @@ def test_formats():
        assert not fail
    # out_degrees NOT works if csr available only
    try:
-        g.formats('csr').in_degrees()
+        g.formats("csr").in_degrees()
        fail = True
    except DGLError:
        fail = False
    finally:
        assert not fail

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    test_query()
    test_mutation()
    test_scipy_adjmat()

--- a/tests/python/common/test_heterograph-pickle.py
+++ b/tests/python/common/test_heterograph-pickle.py
+import io
+import pickle
+import unittest
+
+import backend as F
+
+import dgl
+import dgl.function as fn
 import networkx as nx
+import pytest
 import scipy.sparse as ssp
-import dgl
+import test_utils
 from dgl.graph_index import create_graph_index
 from dgl.utils import toindex
-import backend as F
-import dgl.function as fn
-import pickle
-import io
-import unittest, pytest
-import test_utils
-from test_utils import parametrize_idtype, get_cases
+from test_utils import get_cases, parametrize_idtype
 from utils import assert_is_identical, assert_is_identical_hetero

+
 def _assert_is_identical_nodeflow(nf1, nf2):
    assert nf1.number_of_nodes() == nf2.number_of_nodes()
    src, dst = nf1.all_edges()
@@ -32,23 +36,29 @@ def _assert_is_identical_nodeflow(nf1, nf2):
        for k in nf1.blocks[i].data:
            assert F.allclose(nf1.blocks[i].data[k], nf2.blocks[i].data[k])

+
 def _assert_is_identical_batchedgraph(bg1, bg2):
    assert_is_identical(bg1, bg2)
    assert bg1.batch_size == bg2.batch_size
    assert bg1.batch_num_nodes == bg2.batch_num_nodes
    assert bg1.batch_num_edges == bg2.batch_num_edges

+
 def _assert_is_identical_batchedhetero(bg1, bg2):
    assert_is_identical_hetero(bg1, bg2)
    for ntype in bg1.ntypes:
        assert bg1.batch_num_nodes(ntype) == bg2.batch_num_nodes(ntype)
    for canonical_etype in bg1.canonical_etypes:
-        assert bg1.batch_num_edges(canonical_etype) == bg2.batch_num_edges(canonical_etype)
+        assert bg1.batch_num_edges(canonical_etype) == bg2.batch_num_edges(
+            canonical_etype
+        )
+

 def _assert_is_identical_index(i1, i2):
    assert i1.slice_data() == i2.slice_data()
    assert F.array_equal(i1.tousertensor(), i2.tousertensor())

+
 def _reconstruct_pickle(obj):
    f = io.BytesIO()
    pickle.dump(obj, f)
@@ -58,11 +68,12 @@ def _reconstruct_pickle(obj):

    return obj

+
 def test_pickling_index():
    # normal index
    i = toindex([1, 2, 3])
    i.tousertensor()
-    i.todgltensor() # construct a dgl tensor which is unpicklable
+    i.todgltensor()  # construct a dgl tensor which is unpicklable
    i2 = _reconstruct_pickle(i)
    _assert_is_identical_index(i, i2)

@@ -71,6 +82,7 @@ def test_pickling_index():
    i2 = _reconstruct_pickle(i)
    _assert_is_identical_index(i, i2)

+
 def test_pickling_graph_index():
    gi = create_graph_index(None, False)
    gi.add_nodes(3)
@@ -87,53 +99,65 @@ def test_pickling_graph_index():


 def _global_message_func(nodes):
-    return {'x': nodes.data['x']}
+    return {"x": nodes.data["x"]}
+

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
+@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(exclude=['dglgraph', 'two_hetero_batch']))
+@pytest.mark.parametrize(
+    "g", get_cases(exclude=["dglgraph", "two_hetero_batch"])
+)
 def test_pickling_graph(g, idtype):
    g = g.astype(idtype)
    new_g = _reconstruct_pickle(g)
    test_utils.check_graph_equal(g, new_g, check_feature=True)

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU not implemented")
+
+@unittest.skipIf(F._default_context_str == "gpu", reason="GPU not implemented")
 def test_pickling_batched_heterograph():
    # copied from test_heterograph.create_test_heterograph()
-    g = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
-        ('user', 'wishes', 'game'): ([0, 2], [1, 0]),
-        ('developer', 'develops', 'game'): ([0, 1], [0, 1])
-    })
-    g2 = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
-        ('user', 'wishes', 'game'): ([0, 2], [1, 0]),
-        ('developer', 'develops', 'game'): ([0, 1], [0, 1])
-    })
-
-    g.nodes['user'].data['u_h'] = F.randn((3, 4))
-    g.nodes['game'].data['g_h'] = F.randn((2, 5))
-    g.edges['plays'].data['p_h'] = F.randn((4, 6))
-    g2.nodes['user'].data['u_h'] = F.randn((3, 4))
-    g2.nodes['game'].data['g_h'] = F.randn((2, 5))
-    g2.edges['plays'].data['p_h'] = F.randn((4, 6))
+    g = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
+            ("user", "wishes", "game"): ([0, 2], [1, 0]),
+            ("developer", "develops", "game"): ([0, 1], [0, 1]),
+        }
+    )
+    g2 = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
+            ("user", "wishes", "game"): ([0, 2], [1, 0]),
+            ("developer", "develops", "game"): ([0, 1], [0, 1]),
+        }
+    )
+
+    g.nodes["user"].data["u_h"] = F.randn((3, 4))
+    g.nodes["game"].data["g_h"] = F.randn((2, 5))
+    g.edges["plays"].data["p_h"] = F.randn((4, 6))
+    g2.nodes["user"].data["u_h"] = F.randn((3, 4))
+    g2.nodes["game"].data["g_h"] = F.randn((2, 5))
+    g2.edges["plays"].data["p_h"] = F.randn((4, 6))

    bg = dgl.batch([g, g2])
    new_bg = _reconstruct_pickle(bg)
    test_utils.check_graph_equal(bg, new_bg)

-@unittest.skipIf(F._default_context_str == 'gpu', reason="GPU edge_subgraph w/ relabeling not implemented")
+
+@unittest.skipIf(
+    F._default_context_str == "gpu",
+    reason="GPU edge_subgraph w/ relabeling not implemented",
+)
 def test_pickling_subgraph():
    f1 = io.BytesIO()
    f2 = io.BytesIO()
    g = dgl.rand_graph(10000, 100000)
-    g.ndata['x'] = F.randn((10000, 4))
-    g.edata['x'] = F.randn((100000, 5))
+    g.ndata["x"] = F.randn((10000, 4))
+    g.edata["x"] = F.randn((100000, 5))
    pickle.dump(g, f1)
    sg = g.subgraph([0, 1])
-    sgx = sg.ndata['x'] # materialize
+    sgx = sg.ndata["x"]  # materialize
    pickle.dump(sg, f2)
    # TODO(BarclayII): How should I test that the size of the subgraph pickle file should not
    # be as large as the size of the original pickle file?
@@ -141,38 +165,47 @@ def test_pickling_subgraph():

    f2.seek(0)
    f2.truncate()
-    sgx = sg.edata['x'] # materialize
+    sgx = sg.edata["x"]  # materialize
    pickle.dump(sg, f2)
    assert f1.tell() > f2.tell() * 50

    f2.seek(0)
    f2.truncate()
    sg = g.edge_subgraph([0])
-    sgx = sg.edata['x'] # materialize
+    sgx = sg.edata["x"]  # materialize
    pickle.dump(sg, f2)
    assert f1.tell() > f2.tell() * 50

    f2.seek(0)
    f2.truncate()
-    sgx = sg.ndata['x'] # materialize
+    sgx = sg.ndata["x"]  # materialize
    pickle.dump(sg, f2)
    assert f1.tell() > f2.tell() * 50

    f1.close()
    f2.close()

-@unittest.skipIf(F._default_context_str != 'gpu', reason="Need GPU for pin")
-@unittest.skipIf(dgl.backend.backend_name == "tensorflow", reason="TensorFlow create graph on gpu when unpickle")
+
+@unittest.skipIf(F._default_context_str != "gpu", reason="Need GPU for pin")
+@unittest.skipIf(
+    dgl.backend.backend_name == "tensorflow",
+    reason="TensorFlow create graph on gpu when unpickle",
+)
 @parametrize_idtype
 def test_pickling_is_pinned(idtype):
    from copy import deepcopy
+
    g = dgl.rand_graph(10, 20, idtype=idtype, device=F.cpu())
-    hg = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
-        ('user', 'wishes', 'game'): ([0, 2], [1, 0]),
-        ('developer', 'develops', 'game'): ([0, 1], [0, 1])
-    }, idtype=idtype, device=F.cpu())
+    hg = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 1], [1, 2]),
+            ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
+            ("user", "wishes", "game"): ([0, 2], [1, 0]),
+            ("developer", "develops", "game"): ([0, 1], [0, 1]),
+        },
+        idtype=idtype,
+        device=F.cpu(),
+    )
    for graph in [g, hg]:
        assert not graph.is_pinned()
        graph.pin_memory_()
@@ -186,7 +219,7 @@ def test_pickling_is_pinned(idtype):
        graph.unpin_memory_()


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_pickling_index()
    test_pickling_graph_index()
    test_pickling_frame()

--- a/tests/python/common/test_heterograph-remove.py
+++ b/tests/python/common/test_heterograph-remove.py
 import backend as F
-import numpy as np
-from test_utils import parametrize_idtype

 import dgl
+import numpy as np
+from test_utils import parametrize_idtype


 @parametrize_idtype

--- a/tests/python/common/test_heterograph-shared-memory.py
+++ b/tests/python/common/test_heterograph-shared-memory.py
+import io
+import multiprocessing as mp
+import os
+import pickle
+import unittest
+
+import backend as F
+
+import dgl
+import dgl.function as fn
 import networkx as nx
 import scipy.sparse as ssp
-import dgl
 from dgl.graph_index import create_graph_index
 from dgl.utils import toindex
-import backend as F
-import dgl.function as fn
-import pickle
-import io
-import unittest
 from test_utils import parametrize_idtype
-import multiprocessing as mp
-import os
+

 def create_test_graph(idtype):
-    g = dgl.heterograph(({
-        ('user', 'follows', 'user'): ([0, 1], [1, 2]),
-        ('user', 'plays', 'game'): ([0, 1, 2, 1], [0, 0, 1, 1]),
-        ('user', 'wishes', 'game'): ([0, 2], [1, 0]),
-        ('developer', 'develops', 'game'): ([0, 1], [0, 1])
-    }), idtype=idtype)
+    g = dgl.heterograph(
+        (
+            {
+                ("user", "follows", "user"): ([0, 1], [1, 2]),
+                ("user", "plays", "game"): ([0, 1, 2, 1], [0, 0, 1, 1]),
+                ("user", "wishes", "game"): ([0, 2], [1, 0]),
+                ("developer", "develops", "game"): ([0, 1], [0, 1]),
+            }
+        ),
+        idtype=idtype,
+    )
    return g

+
 def _assert_is_identical_hetero(g, g2):
    assert g.ntypes == g2.ntypes
    assert g.canonical_etypes == g2.canonical_etypes
@@ -35,29 +44,38 @@ def _assert_is_identical_hetero(g, g2):

    # check if edge ID spaces and feature spaces are equal
    for etype in g.canonical_etypes:
-        src, dst = g.all_edges(etype=etype, order='eid')
-        src2, dst2 = g2.all_edges(etype=etype, order='eid')
+        src, dst = g.all_edges(etype=etype, order="eid")
+        src2, dst2 = g2.all_edges(etype=etype, order="eid")
        assert F.array_equal(src, src2)
        assert F.array_equal(dst, dst2)

-@unittest.skipIf(dgl.backend.backend_name == 'tensorflow', reason='Not support tensorflow for now')
+
+@unittest.skipIf(
+    dgl.backend.backend_name == "tensorflow",
+    reason="Not support tensorflow for now",
+)
 @parametrize_idtype
 def test_single_process(idtype):
    hg = create_test_graph(idtype=idtype)
    hg_share = hg.shared_memory("hg")
-    hg_rebuild = dgl.hetero_from_shared_memory('hg')
+    hg_rebuild = dgl.hetero_from_shared_memory("hg")
    hg_save_again = hg_rebuild.shared_memory("hg")
    _assert_is_identical_hetero(hg, hg_share)
    _assert_is_identical_hetero(hg, hg_rebuild)
    _assert_is_identical_hetero(hg, hg_save_again)

+
 def sub_proc(hg_origin, name):
    hg_rebuild = dgl.hetero_from_shared_memory(name)
    hg_save_again = hg_rebuild.shared_memory(name)
    _assert_is_identical_hetero(hg_origin, hg_rebuild)
    _assert_is_identical_hetero(hg_origin, hg_save_again)

-@unittest.skipIf(dgl.backend.backend_name == 'tensorflow', reason='Not support tensorflow for now')
+
+@unittest.skipIf(
+    dgl.backend.backend_name == "tensorflow",
+    reason="Not support tensorflow for now",
+)
 @parametrize_idtype
 def test_multi_process(idtype):
    hg = create_test_graph(idtype=idtype)
@@ -66,8 +84,14 @@ def test_multi_process(idtype):
    p.start()
    p.join()

-@unittest.skipIf(F._default_context_str == 'cpu', reason="Need gpu for this test")
-@unittest.skipIf(dgl.backend.backend_name == 'tensorflow', reason='Not support tensorflow for now')
+
+@unittest.skipIf(
+    F._default_context_str == "cpu", reason="Need gpu for this test"
+)
+@unittest.skipIf(
+    dgl.backend.backend_name == "tensorflow",
+    reason="Not support tensorflow for now",
+)
 def test_copy_from_gpu():
    hg = create_test_graph(idtype=F.int32)
    hg_gpu = hg.to(F.cuda())
@@ -76,6 +100,7 @@ def test_copy_from_gpu():
    p.start()
    p.join()

+
 # TODO: Test calling shared_memory with Blocks (a subclass of HeteroGraph)
 if __name__ == "__main__":
    test_single_process(F.int64)

--- a/tests/python/common/test_heterograph-specialization.py
+++ b/tests/python/common/test_heterograph-specialization.py
-import numpy as np
-import scipy.sparse as sp
+import backend as F
+
 import dgl
 import dgl.function as fn
-import backend as F
+import numpy as np
+import scipy.sparse as sp
 from test_utils import parametrize_idtype

 D = 5

+
 def generate_graph(idtype):
    g = dgl.DGLGraph()
    g = g.astype(idtype).to(F.ctx())
@@ -17,117 +19,142 @@ def generate_graph(idtype):
        g.add_edges(i, 9)
    # add a back flow from 9 to 0
    g.add_edges(9, 0)
-    g.ndata.update({'f1' : F.randn((10,)), 'f2' : F.randn((10, D))})
+    g.ndata.update({"f1": F.randn((10,)), "f2": F.randn((10, D))})
    weights = F.randn((17,))
-    g.edata.update({'e1': weights, 'e2': F.unsqueeze(weights, 1)})
+    g.edata.update({"e1": weights, "e2": F.unsqueeze(weights, 1)})
    return g

+
 @parametrize_idtype
 def test_v2v_update_all(idtype):
    def _test(fld):
        def message_func(edges):
-            return {'m' : edges.src[fld]}
+            return {"m": edges.src[fld]}

        def message_func_edge(edges):
            if len(edges.src[fld].shape) == 1:
-                return {'m' : edges.src[fld] * edges.data['e1']}
+                return {"m": edges.src[fld] * edges.data["e1"]}
            else:
-                return {'m' : edges.src[fld] * edges.data['e2']}
+                return {"m": edges.src[fld] * edges.data["e2"]}

        def reduce_func(nodes):
-            return {fld : F.sum(nodes.mailbox['m'], 1)}
+            return {fld: F.sum(nodes.mailbox["m"], 1)}

        def apply_func(nodes):
-            return {fld : 2 * nodes.data[fld]}
+            return {fld: 2 * nodes.data[fld]}
+
        g = generate_graph(idtype)
        # update all
        v1 = g.ndata[fld]
-        g.update_all(fn.copy_u(u=fld, out='m'), fn.sum(msg='m', out=fld), apply_func)
+        g.update_all(
+            fn.copy_u(u=fld, out="m"), fn.sum(msg="m", out=fld), apply_func
+        )
        v2 = g.ndata[fld]
-        g.ndata.update({fld : v1})
+        g.ndata.update({fld: v1})
        g.update_all(message_func, reduce_func, apply_func)
        v3 = g.ndata[fld]
        assert F.allclose(v2, v3)
        # update all with edge weights
        v1 = g.ndata[fld]
-        g.update_all(fn.u_mul_e(fld, 'e1', 'm'),
-                fn.sum(msg='m', out=fld), apply_func)
+        g.update_all(
+            fn.u_mul_e(fld, "e1", "m"), fn.sum(msg="m", out=fld), apply_func
+        )
        v2 = g.ndata[fld]
-        g.ndata.update({fld : v1})
+        g.ndata.update({fld: v1})
        g.update_all(message_func_edge, reduce_func, apply_func)
        v4 = g.ndata[fld]
        assert F.allclose(v2, v4)
+
    # test 1d node features
-    _test('f1')
+    _test("f1")
    # test 2d node features
-    _test('f2')
+    _test("f2")
+

 @parametrize_idtype
 def test_v2v_snr(idtype):
    u = F.tensor([0, 0, 0, 3, 4, 9], idtype)
    v = F.tensor([1, 2, 3, 9, 9, 0], idtype)
+
    def _test(fld):
        def message_func(edges):
-            return {'m' : edges.src[fld]}
+            return {"m": edges.src[fld]}

        def message_func_edge(edges):
            if len(edges.src[fld].shape) == 1:
-                return {'m' : edges.src[fld] * edges.data['e1']}
+                return {"m": edges.src[fld] * edges.data["e1"]}
            else:
-                return {'m' : edges.src[fld] * edges.data['e2']}
+                return {"m": edges.src[fld] * edges.data["e2"]}

        def reduce_func(nodes):
-            return {fld : F.sum(nodes.mailbox['m'], 1)}
+            return {fld: F.sum(nodes.mailbox["m"], 1)}

        def apply_func(nodes):
-            return {fld : 2 * nodes.data[fld]}
+            return {fld: 2 * nodes.data[fld]}
+
        g = generate_graph(idtype)
        # send and recv
        v1 = g.ndata[fld]
-        g.send_and_recv((u, v), fn.copy_u(u=fld, out='m'),
-                fn.sum(msg='m', out=fld), apply_func)
+        g.send_and_recv(
+            (u, v),
+            fn.copy_u(u=fld, out="m"),
+            fn.sum(msg="m", out=fld),
+            apply_func,
+        )
        v2 = g.ndata[fld]
-        g.ndata.update({fld : v1})
+        g.ndata.update({fld: v1})
        g.send_and_recv((u, v), message_func, reduce_func, apply_func)
        v3 = g.ndata[fld]
        assert F.allclose(v2, v3)
        # send and recv with edge weights
        v1 = g.ndata[fld]
-        g.send_and_recv((u, v), fn.u_mul_e(fld, 'e1', 'm'),
-                fn.sum(msg='m', out=fld), apply_func)
+        g.send_and_recv(
+            (u, v),
+            fn.u_mul_e(fld, "e1", "m"),
+            fn.sum(msg="m", out=fld),
+            apply_func,
+        )
        v2 = g.ndata[fld]
-        g.ndata.update({fld : v1})
+        g.ndata.update({fld: v1})
        g.send_and_recv((u, v), message_func_edge, reduce_func, apply_func)
        v4 = g.ndata[fld]
        assert F.allclose(v2, v4)
+
    # test 1d node features
-    _test('f1')
+    _test("f1")
    # test 2d node features
-    _test('f2')
+    _test("f2")


 @parametrize_idtype
 def test_v2v_pull(idtype):
    nodes = F.tensor([1, 2, 3, 9], idtype)
+
    def _test(fld):
        def message_func(edges):
-            return {'m' : edges.src[fld]}
+            return {"m": edges.src[fld]}

        def message_func_edge(edges):
            if len(edges.src[fld].shape) == 1:
-                return {'m' : edges.src[fld] * edges.data['e1']}
+                return {"m": edges.src[fld] * edges.data["e1"]}
            else:
-                return {'m' : edges.src[fld] * edges.data['e2']}
+                return {"m": edges.src[fld] * edges.data["e2"]}

        def reduce_func(nodes):
-            return {fld : F.sum(nodes.mailbox['m'], 1)}
+            return {fld: F.sum(nodes.mailbox["m"], 1)}

        def apply_func(nodes):
-            return {fld : 2 * nodes.data[fld]}
+            return {fld: 2 * nodes.data[fld]}
+
        g = generate_graph(idtype)
        # send and recv
        v1 = g.ndata[fld]
-        g.pull(nodes, fn.copy_u(u=fld, out='m'), fn.sum(msg='m', out=fld), apply_func)
+        g.pull(
+            nodes,
+            fn.copy_u(u=fld, out="m"),
+            fn.sum(msg="m", out=fld),
+            apply_func,
+        )
        v2 = g.ndata[fld]
        g.ndata[fld] = v1
        g.pull(nodes, message_func, reduce_func, apply_func)
@@ -135,17 +162,23 @@ def test_v2v_pull(idtype):
        assert F.allclose(v2, v3)
        # send and recv with edge weights
        v1 = g.ndata[fld]
-        g.pull(nodes, fn.u_mul_e(fld, 'e1', 'm'),
-                fn.sum(msg='m', out=fld), apply_func)
+        g.pull(
+            nodes,
+            fn.u_mul_e(fld, "e1", "m"),
+            fn.sum(msg="m", out=fld),
+            apply_func,
+        )
        v2 = g.ndata[fld]
        g.ndata[fld] = v1
        g.pull(nodes, message_func_edge, reduce_func, apply_func)
        v4 = g.ndata[fld]
        assert F.allclose(v2, v4)
+
    # test 1d node features
-    _test('f1')
+    _test("f1")
    # test 2d node features
-    _test('f2')
+    _test("f2")
+

 @parametrize_idtype
 def test_update_all_multi_fallback(idtype):
@@ -156,42 +189,50 @@ def test_update_all_multi_fallback(idtype):
    for i in range(1, 9):
        g.add_edges(0, i)
        g.add_edges(i, 9)
-    g.ndata['h'] = F.randn((10, D))
-    g.edata['w1'] = F.randn((16,))
-    g.edata['w2'] = F.randn((16, D))
+    g.ndata["h"] = F.randn((10, D))
+    g.edata["w1"] = F.randn((16,))
+    g.edata["w2"] = F.randn((16, D))
+
    def _mfunc_hxw1(edges):
-        return {'m1' : edges.src['h'] * F.unsqueeze(edges.data['w1'], 1)}
+        return {"m1": edges.src["h"] * F.unsqueeze(edges.data["w1"], 1)}
+
    def _mfunc_hxw2(edges):
-        return {'m2' : edges.src['h'] * edges.data['w2']}
+        return {"m2": edges.src["h"] * edges.data["w2"]}
+
    def _rfunc_m1(nodes):
-        return {'o1' : F.sum(nodes.mailbox['m1'], 1)}
+        return {"o1": F.sum(nodes.mailbox["m1"], 1)}
+
    def _rfunc_m2(nodes):
-        return {'o2' : F.sum(nodes.mailbox['m2'], 1)}
+        return {"o2": F.sum(nodes.mailbox["m2"], 1)}
+
    def _rfunc_m1max(nodes):
-        return {'o3' : F.max(nodes.mailbox['m1'], 1)}
+        return {"o3": F.max(nodes.mailbox["m1"], 1)}
+
    def _afunc(nodes):
        ret = {}
        for k, v in nodes.data.items():
-            if k.startswith('o'):
+            if k.startswith("o"):
                ret[k] = 2 * v
        return ret
+
    # compute ground truth
    g.update_all(_mfunc_hxw1, _rfunc_m1, _afunc)
-    o1 = g.ndata.pop('o1')
+    o1 = g.ndata.pop("o1")
    g.update_all(_mfunc_hxw2, _rfunc_m2, _afunc)
-    o2 = g.ndata.pop('o2')
+    o2 = g.ndata.pop("o2")
    g.update_all(_mfunc_hxw1, _rfunc_m1max, _afunc)
-    o3 = g.ndata.pop('o3')
+    o3 = g.ndata.pop("o3")
    # v2v spmv
-    g.update_all(fn.u_mul_e('h', 'w1', 'm1'),
-                 fn.sum(msg='m1', out='o1'),
-                 _afunc)
-    assert F.allclose(o1, g.ndata.pop('o1'))
+    g.update_all(
+        fn.u_mul_e("h", "w1", "m1"), fn.sum(msg="m1", out="o1"), _afunc
+    )
+    assert F.allclose(o1, g.ndata.pop("o1"))
    # v2v fallback to e2v
-    g.update_all(fn.u_mul_e('h', 'w2', 'm2'),
-                 fn.sum(msg='m2', out='o2'),
-                 _afunc)
-    assert F.allclose(o2, g.ndata.pop('o2'))
+    g.update_all(
+        fn.u_mul_e("h", "w2", "m2"), fn.sum(msg="m2", out="o2"), _afunc
+    )
+    assert F.allclose(o2, g.ndata.pop("o2"))
+

 @parametrize_idtype
 def test_pull_multi_fallback(idtype):
@@ -202,44 +243,58 @@ def test_pull_multi_fallback(idtype):
    for i in range(1, 9):
        g.add_edges(0, i)
        g.add_edges(i, 9)
-    g.ndata['h'] = F.randn((10, D))
-    g.edata['w1'] = F.randn((16,))
-    g.edata['w2'] = F.randn((16, D))
+    g.ndata["h"] = F.randn((10, D))
+    g.edata["w1"] = F.randn((16,))
+    g.edata["w2"] = F.randn((16, D))
+
    def _mfunc_hxw1(edges):
-        return {'m1' : edges.src['h'] * F.unsqueeze(edges.data['w1'], 1)}
+        return {"m1": edges.src["h"] * F.unsqueeze(edges.data["w1"], 1)}
+
    def _mfunc_hxw2(edges):
-        return {'m2' : edges.src['h'] * edges.data['w2']}
+        return {"m2": edges.src["h"] * edges.data["w2"]}
+
    def _rfunc_m1(nodes):
-        return {'o1' : F.sum(nodes.mailbox['m1'], 1)}
+        return {"o1": F.sum(nodes.mailbox["m1"], 1)}
+
    def _rfunc_m2(nodes):
-        return {'o2' : F.sum(nodes.mailbox['m2'], 1)}
+        return {"o2": F.sum(nodes.mailbox["m2"], 1)}
+
    def _rfunc_m1max(nodes):
-        return {'o3' : F.max(nodes.mailbox['m1'], 1)}
+        return {"o3": F.max(nodes.mailbox["m1"], 1)}
+
    def _afunc(nodes):
        ret = {}
        for k, v in nodes.data.items():
-            if k.startswith('o'):
+            if k.startswith("o"):
                ret[k] = 2 * v
        return ret
+
    # nodes to pull
    def _pull_nodes(nodes):
        # compute ground truth
        g.pull(nodes, _mfunc_hxw1, _rfunc_m1, _afunc)
-        o1 = g.ndata.pop('o1')
+        o1 = g.ndata.pop("o1")
        g.pull(nodes, _mfunc_hxw2, _rfunc_m2, _afunc)
-        o2 = g.ndata.pop('o2')
+        o2 = g.ndata.pop("o2")
        g.pull(nodes, _mfunc_hxw1, _rfunc_m1max, _afunc)
-        o3 = g.ndata.pop('o3')
+        o3 = g.ndata.pop("o3")
        # v2v spmv
-        g.pull(nodes, fn.u_mul_e('h', 'w1', 'm1'),
-                     fn.sum(msg='m1', out='o1'),
-                     _afunc)
-        assert F.allclose(o1, g.ndata.pop('o1'))
+        g.pull(
+            nodes,
+            fn.u_mul_e("h", "w1", "m1"),
+            fn.sum(msg="m1", out="o1"),
+            _afunc,
+        )
+        assert F.allclose(o1, g.ndata.pop("o1"))
        # v2v fallback to e2v
-        g.pull(nodes, fn.u_mul_e('h', 'w2', 'm2'),
-                     fn.sum(msg='m2', out='o2'),
-                     _afunc)
-        assert F.allclose(o2, g.ndata.pop('o2'))
+        g.pull(
+            nodes,
+            fn.u_mul_e("h", "w2", "m2"),
+            fn.sum(msg="m2", out="o2"),
+            _afunc,
+        )
+        assert F.allclose(o2, g.ndata.pop("o2"))
+
    # test#1: non-0deg nodes
    nodes = [1, 2, 9]
    _pull_nodes(nodes)
@@ -247,13 +302,17 @@ def test_pull_multi_fallback(idtype):
    nodes = [0, 1, 2, 9]
    _pull_nodes(nodes)

+
 @parametrize_idtype
 def test_spmv_3d_feat(idtype):
    def src_mul_edge_udf(edges):
-        return {'sum': edges.src['h'] * F.unsqueeze(F.unsqueeze(edges.data['h'], 1), 1)}
+        return {
+            "sum": edges.src["h"]
+            * F.unsqueeze(F.unsqueeze(edges.data["h"], 1), 1)
+        }

    def sum_udf(nodes):
-        return {'h': F.sum(nodes.mailbox['sum'], 1)}
+        return {"h": F.sum(nodes.mailbox["sum"], 1)}

    n = 100
    p = 0.1
@@ -266,44 +325,53 @@ def test_spmv_3d_feat(idtype):
    h = F.randn((n, 5, 5))
    e = F.randn((m,))

-    g.ndata['h'] = h
-    g.edata['h'] = e
-    g.update_all(message_func=fn.u_mul_e('h', 'h', 'sum'), reduce_func=fn.sum('sum', 'h')) # 1
-    ans = g.ndata['h']
+    g.ndata["h"] = h
+    g.edata["h"] = e
+    g.update_all(
+        message_func=fn.u_mul_e("h", "h", "sum"), reduce_func=fn.sum("sum", "h")
+    )  # 1
+    ans = g.ndata["h"]

-    g.ndata['h'] = h
-    g.edata['h'] = e
-    g.update_all(message_func=src_mul_edge_udf, reduce_func=fn.sum('sum', 'h')) # 2
-    assert F.allclose(g.ndata['h'], ans)
+    g.ndata["h"] = h
+    g.edata["h"] = e
+    g.update_all(
+        message_func=src_mul_edge_udf, reduce_func=fn.sum("sum", "h")
+    )  # 2
+    assert F.allclose(g.ndata["h"], ans)

-    g.ndata['h'] = h
-    g.edata['h'] = e
-    g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3
-    assert F.allclose(g.ndata['h'], ans)
+    g.ndata["h"] = h
+    g.edata["h"] = e
+    g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf)  # 3
+    assert F.allclose(g.ndata["h"], ans)

    # test#2: e2v
    def src_mul_edge_udf(edges):
-        return {'sum': edges.src['h'] * edges.data['h']}
+        return {"sum": edges.src["h"] * edges.data["h"]}

    h = F.randn((n, 5, 5))
    e = F.randn((m, 5, 5))

-    g.ndata['h'] = h
-    g.edata['h'] = e
-    g.update_all(message_func=fn.u_mul_e('h', 'h', 'sum'), reduce_func=fn.sum('sum', 'h')) # 1
-    ans = g.ndata['h']
+    g.ndata["h"] = h
+    g.edata["h"] = e
+    g.update_all(
+        message_func=fn.u_mul_e("h", "h", "sum"), reduce_func=fn.sum("sum", "h")
+    )  # 1
+    ans = g.ndata["h"]
+
+    g.ndata["h"] = h
+    g.edata["h"] = e
+    g.update_all(
+        message_func=src_mul_edge_udf, reduce_func=fn.sum("sum", "h")
+    )  # 2
+    assert F.allclose(g.ndata["h"], ans)

-    g.ndata['h'] = h
-    g.edata['h'] = e
-    g.update_all(message_func=src_mul_edge_udf, reduce_func=fn.sum('sum', 'h')) # 2
-    assert F.allclose(g.ndata['h'], ans)
+    g.ndata["h"] = h
+    g.edata["h"] = e
+    g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf)  # 3
+    assert F.allclose(g.ndata["h"], ans)

-    g.ndata['h'] = h
-    g.edata['h'] = e
-    g.update_all(message_func=src_mul_edge_udf, reduce_func=sum_udf) # 3
-    assert F.allclose(g.ndata['h'], ans)

-if __name__ == '__main__':
+if __name__ == "__main__":
    test_v2v_update_all()
    test_v2v_snr()
    test_v2v_pull()

--- a/tests/python/common/test_heterograph-update-all.py
+++ b/tests/python/common/test_heterograph-update-all.py
@@ -4,18 +4,18 @@ from collections import Counter
 from itertools import product

 import backend as F
+
+import dgl
+import dgl.function as fn
 import networkx as nx
 import numpy as np
 import pytest
 import scipy.sparse as ssp
 import test_utils
+from dgl import DGLError
 from scipy.sparse import rand
 from test_utils import get_cases, parametrize_idtype

-import dgl
-import dgl.function as fn
-from dgl import DGLError
-
 rfuncs = {"sum": fn.sum, "max": fn.max, "min": fn.min, "mean": fn.mean}
 feat_size = 2

@@ -48,7 +48,6 @@ def create_test_heterograph(idtype):


 def create_test_heterograph_2(idtype):
-
    src = np.random.randint(0, 50, 25)
    dst = np.random.randint(0, 50, 25)
    src1 = np.random.randint(0, 25, 10)
@@ -72,7 +71,6 @@ def create_test_heterograph_2(idtype):


 def create_test_heterograph_large(idtype):
-
    src = np.random.randint(0, 50, 2500)
    dst = np.random.randint(0, 50, 2500)
    g = dgl.heterograph(
@@ -163,7 +161,6 @@ def test_unary_copy_u(idtype):
 @parametrize_idtype
 def test_unary_copy_e(idtype):
    def _test(mfunc, rfunc):
-
        g = create_test_heterograph_large(idtype)
        g0 = create_test_heterograph_2(idtype)
        g1 = create_test_heterograph(idtype)
@@ -230,6 +227,7 @@ def test_unary_copy_e(idtype):
            e_grad6 = F.grad(g["plays"].edata["eid"])
            e_grad7 = F.grad(g["wishes"].edata["eid"])
            e_grad8 = F.grad(g["follows"].edata["eid"])
+
        # # correctness check
        def _print_error(a, b):
            for i, (x, y) in enumerate(
@@ -254,7 +252,6 @@ def test_unary_copy_e(idtype):
 @parametrize_idtype
 def test_binary_op(idtype):
    def _test(lhs, rhs, binary_op, reducer):
-
        g = create_test_heterograph(idtype)

        x1 = F.randn((g.num_nodes("user"), feat_size))
@@ -309,6 +306,7 @@ def test_binary_op(idtype):
        r2 = g.nodes["game"].data["y"]
        F.backward(r2, F.ones(r2.shape))
        n_grad2 = F.grad(r2)
+
        # correctness check
        def _print_error(a, b):
            for i, (x, y) in enumerate(