[Misc] Black auto fix. (#4694)

Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Black auto fix. (#4694)
Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
89a4cc4d · Hongzhi (Steve), Chen · GitHub · 303b150f · 89a4cc4d · 89a4cc4d
Unverified Commit 89a4cc4d authored Oct 11, 2022 by Hongzhi (Steve), Chen Committed by GitHub Oct 11, 2022
11 changed files
--- a/tests/mxnet/test_geometry.py
+++ b/tests/mxnet/test_geometry.py
+import backend as F
 import mxnet as mx
+import numpy as np
+
 from dgl.geometry import farthest_point_sampler
-import backend as F

-import numpy as np

 def test_fps():
    N = 1000
    batch_size = 5
    sample_points = 10
-    x = mx.nd.array(np.random.uniform(size=(batch_size, int(N/batch_size), 3)))
+    x = mx.nd.array(
+        np.random.uniform(size=(batch_size, int(N / batch_size), 3))
+    )
    ctx = F.ctx()
    if F.gpu_ctx():
        x = x.as_in_context(ctx)
@@ -17,5 +20,6 @@ def test_fps():
    assert res.shape[1] == sample_points
    assert res.sum() > 0

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    test_fps()
--- a/tests/mxnet/test_nn.py
+++ b/tests/mxnet/test_nn.py
+import backend as F
 import mxnet as mx
 import networkx as nx
 import numpy as np
-import scipy as sp
 import pytest
+import scipy as sp
+from mxnet import autograd, gluon, nd
+from test_utils import parametrize_idtype
+from test_utils.graph_cases import (
+    get_cases,
+    random_bipartite,
+    random_dglgraph,
+    random_graph,
+)
+
 import dgl
-import dgl.nn.mxnet as nn
 import dgl.function as fn
-import backend as F
-from test_utils.graph_cases import get_cases, random_graph, random_bipartite, random_dglgraph
-from test_utils import parametrize_idtype
-from mxnet import autograd, gluon, nd
+import dgl.nn.mxnet as nn
+

 def check_close(a, b):
    assert np.allclose(a.asnumpy(), b.asnumpy(), rtol=1e-4, atol=1e-4)

+
 def _AXWb(A, X, W, b):
    X = mx.nd.dot(X, W.data(X.context))
    Y = mx.nd.dot(A, X.reshape(X.shape[0], -1)).reshape(X.shape)
    return Y + b.data(X.context)

+
 @parametrize_idtype
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_graph_conv(idtype, out_dim):
    g = dgl.from_networkx(nx.path_graph(3))
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
    adj = g.adjacency_matrix(transpose=True, ctx=ctx)

-    conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
+    conv = nn.GraphConv(5, out_dim, norm="none", bias=True)
    conv.initialize(ctx=ctx)
    # test#1: basic
    h0 = F.ones((3, 5))
@@ -77,14 +86,18 @@ def test_graph_conv(idtype, out_dim):
    assert len(g.ndata) == 1
    assert len(g.edata) == 0
    assert "h" in g.ndata
-    check_close(g.ndata['h'], 2 * F.ones((3, 1)))
+    check_close(g.ndata["h"], 2 * F.ones((3, 1)))
+

 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree', 'dglgraph']))
-@pytest.mark.parametrize('norm', ['none', 'both', 'right', 'left'])
-@pytest.mark.parametrize('weight', [True, False])
-@pytest.mark.parametrize('bias', [False])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize(
+    "g",
+    get_cases(["homo", "block-bipartite"], exclude=["zero-degree", "dglgraph"]),
+)
+@pytest.mark.parametrize("norm", ["none", "both", "right", "left"])
+@pytest.mark.parametrize("weight", [True, False])
+@pytest.mark.parametrize("bias", [False])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_graph_conv2(idtype, g, norm, weight, bias, out_dim):
    g = g.astype(idtype).to(F.ctx())
    conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias)
@@ -99,12 +112,15 @@ def test_graph_conv2(idtype, g, norm, weight, bias, out_dim):
        h_out = conv(g, h, ext_w)
    assert h_out.shape == (ndst, out_dim)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree', 'dglgraph']))
-@pytest.mark.parametrize('norm', ['none', 'both', 'right'])
-@pytest.mark.parametrize('weight', [True, False])
-@pytest.mark.parametrize('bias', [False])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize(
+    "g", get_cases(["bipartite"], exclude=["zero-degree", "dglgraph"])
+)
+@pytest.mark.parametrize("norm", ["none", "both", "right"])
+@pytest.mark.parametrize("weight", [True, False])
+@pytest.mark.parametrize("bias", [False])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_graph_conv2_bi(idtype, g, norm, weight, bias, out_dim):
    g = g.astype(idtype).to(F.ctx())
    conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias)
@@ -120,6 +136,7 @@ def test_graph_conv2_bi(idtype, g, norm, weight, bias, out_dim):
        h_out = conv(g, (h, h_dst), ext_w)
    assert h_out.shape == (ndst, out_dim)

+
 def _S2AXWb(A, N, X, W, b):
    X1 = X * N
    X1 = mx.nd.dot(A, X1.reshape(X1.shape[0], -1))
@@ -132,12 +149,13 @@ def _S2AXWb(A, N, X, W, b):

    return Y + b

-@pytest.mark.parametrize('out_dim', [1, 2])
+
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_tagconv(out_dim):
    g = dgl.from_networkx(nx.path_graph(3)).to(F.ctx())
    ctx = F.ctx()
    adj = g.adjacency_matrix(transpose=True, ctx=ctx)
-    norm = mx.nd.power(g.in_degrees().astype('float32'), -0.5)
+    norm = mx.nd.power(g.in_degrees().astype("float32"), -0.5)

    conv = nn.TAGConv(5, out_dim, bias=True)
    conv.initialize(ctx=ctx)
@@ -151,7 +169,9 @@ def test_tagconv(out_dim):
    shp = norm.shape + (1,) * (h0.ndim - 1)
    norm = norm.reshape(shp).as_in_context(h0.context)

-    assert F.allclose(h1, _S2AXWb(adj, norm, h0, conv.lin.data(ctx), conv.h_bias.data(ctx)))
+    assert F.allclose(
+        h1, _S2AXWb(adj, norm, h0, conv.lin.data(ctx), conv.h_bias.data(ctx))
+    )

    conv = nn.TAGConv(5, out_dim)
    conv.initialize(ctx=ctx)
@@ -161,14 +181,17 @@ def test_tagconv(out_dim):
    h1 = conv(g, h0)
    assert h1.shape[-1] == out_dim

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 20])
-@pytest.mark.parametrize('num_heads', [1, 5])
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"])
+)
+@pytest.mark.parametrize("out_dim", [1, 20])
+@pytest.mark.parametrize("num_heads", [1, 5])
 def test_gat_conv(g, idtype, out_dim, num_heads):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    gat = nn.GATConv(10, out_dim, num_heads) # n_heads = 5
+    gat = nn.GATConv(10, out_dim, num_heads)  # n_heads = 5
    gat.initialize(ctx=ctx)
    print(gat)
    feat = F.randn((g.number_of_src_nodes(), 10))
@@ -182,25 +205,30 @@ def test_gat_conv(g, idtype, out_dim, num_heads):
    gat.initialize(ctx=ctx)
    h = gat(g, feat)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
-@pytest.mark.parametrize('num_heads', [1, 4])
+@pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"]))
+@pytest.mark.parametrize("out_dim", [1, 2])
+@pytest.mark.parametrize("num_heads", [1, 4])
 def test_gat_conv_bi(g, idtype, out_dim, num_heads):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
    gat = nn.GATConv(5, out_dim, num_heads)
    gat.initialize(ctx=ctx)
-    feat = (F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)))
+    feat = (
+        F.randn((g.number_of_src_nodes(), 5)),
+        F.randn((g.number_of_dst_nodes(), 5)),
+    )
    h = gat(g, feat)
    assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim)
    _, a = gat(g, feat, True)
    assert a.shape == (g.number_of_edges(), num_heads, 1)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite']))
-@pytest.mark.parametrize('aggre_type', ['mean', 'pool', 'gcn'])
-@pytest.mark.parametrize('out_dim', [1, 10])
+@pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"]))
+@pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"])
+@pytest.mark.parametrize("out_dim", [1, 10])
 def test_sage_conv(idtype, g, aggre_type, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -210,36 +238,41 @@ def test_sage_conv(idtype, g, aggre_type, out_dim):
    h = sage(g, feat)
    assert h.shape[-1] == out_dim

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite']))
-@pytest.mark.parametrize('aggre_type', ['mean', 'pool', 'gcn'])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("g", get_cases(["bipartite"]))
+@pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_sage_conv_bi(idtype, g, aggre_type, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    dst_dim = 5 if aggre_type != 'gcn' else 10
+    dst_dim = 5 if aggre_type != "gcn" else 10
    sage = nn.SAGEConv((10, dst_dim), out_dim, aggre_type)
-    feat = (F.randn((g.number_of_src_nodes(), 10)), F.randn((g.number_of_dst_nodes(), dst_dim)))
+    feat = (
+        F.randn((g.number_of_src_nodes(), 10)),
+        F.randn((g.number_of_dst_nodes(), dst_dim)),
+    )
    sage.initialize(ctx=ctx)
    h = sage(g, feat)
    assert h.shape[-1] == out_dim
    assert h.shape[0] == g.number_of_dst_nodes()

+
 @parametrize_idtype
-@pytest.mark.parametrize('aggre_type', ['mean', 'pool', 'gcn'])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_sage_conv_bi2(idtype, aggre_type, out_dim):
    # Test the case for graphs without edges
-    g = dgl.heterograph({('_U', '_E', '_V'): ([], [])}, {'_U': 5, '_V': 3})
+    g = dgl.heterograph({("_U", "_E", "_V"): ([], [])}, {"_U": 5, "_V": 3})
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    sage = nn.SAGEConv((3, 3), out_dim, 'gcn')
+    sage = nn.SAGEConv((3, 3), out_dim, "gcn")
    feat = (F.randn((5, 3)), F.randn((3, 3)))
    sage.initialize(ctx=ctx)
    h = sage(g, feat)
    assert h.shape[-1] == out_dim
    assert h.shape[0] == 3
-    for aggre_type in ['mean', 'pool']:
+    for aggre_type in ["mean", "pool"]:
        sage = nn.SAGEConv((3, 1), out_dim, aggre_type)
        feat = (F.randn((5, 3)), F.randn((3, 1)))
        sage.initialize(ctx=ctx)
@@ -247,11 +280,12 @@ def test_sage_conv_bi2(idtype, aggre_type, out_dim):
        assert h.shape[-1] == out_dim
        assert h.shape[0] == 3

+
 def test_gg_conv():
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    ctx = F.ctx()

-    gg_conv = nn.GatedGraphConv(10, 20, 3, 4) # n_step = 3, n_etypes = 4
+    gg_conv = nn.GatedGraphConv(10, 20, 3, 4)  # n_step = 3, n_etypes = 4
    gg_conv.initialize(ctx=ctx)
    print(gg_conv)

@@ -261,12 +295,13 @@ def test_gg_conv():
    h1 = gg_conv(g, h0, etypes)
    assert h1.shape == (20, 20)

-@pytest.mark.parametrize('out_dim', [1, 20])
+
+@pytest.mark.parametrize("out_dim", [1, 20])
 def test_cheb_conv(out_dim):
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    ctx = F.ctx()

-    cheb = nn.ChebConv(10, out_dim, 3) # k = 3
+    cheb = nn.ChebConv(10, out_dim, 3)  # k = 3
    cheb.initialize(ctx=ctx)
    print(cheb)

@@ -275,8 +310,11 @@ def test_cheb_conv(out_dim):
    h1 = cheb(g, h0)
    assert h1.shape == (20, out_dim)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree']))
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"])
+)
 def test_agnn_conv(g, idtype):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -287,18 +325,23 @@ def test_agnn_conv(g, idtype):
    h = agnn_conv(g, feat)
    assert h.shape == (g.number_of_dst_nodes(), 10)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree']))
+@pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"]))
 def test_agnn_conv_bi(g, idtype):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
    agnn_conv = nn.AGNNConv(0.1, True)
    agnn_conv.initialize(ctx=ctx)
    print(agnn_conv)
-    feat = (F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)))
+    feat = (
+        F.randn((g.number_of_src_nodes(), 5)),
+        F.randn((g.number_of_dst_nodes(), 5)),
+    )
    h = agnn_conv(g, feat)
    assert h.shape == (g.number_of_dst_nodes(), 5)

+
 def test_appnp_conv():
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    ctx = F.ctx()
@@ -312,69 +355,70 @@ def test_appnp_conv():
    h1 = appnp_conv(g, h0)
    assert h1.shape == (20, 10)

-@pytest.mark.parametrize('out_dim', [1, 2])
+
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_dense_cheb_conv(out_dim):
    for k in range(1, 4):
        ctx = F.ctx()
        g = dgl.from_scipy(sp.sparse.random(100, 100, density=0.3)).to(F.ctx())
-        adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
+        adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype("default")
        cheb = nn.ChebConv(5, out_dim, k)
        dense_cheb = nn.DenseChebConv(5, out_dim, k)
        cheb.initialize(ctx=ctx)
        dense_cheb.initialize(ctx=ctx)

        for i in range(len(cheb.fc)):
-            dense_cheb.fc[i].weight.set_data(
-                cheb.fc[i].weight.data())
+            dense_cheb.fc[i].weight.set_data(cheb.fc[i].weight.data())
            if cheb.bias is not None:
-                dense_cheb.bias.set_data(
-                    cheb.bias.data())
+                dense_cheb.bias.set_data(cheb.bias.data())

        feat = F.randn((100, 5))
        out_cheb = cheb(g, feat, [2.0])
        out_dense_cheb = dense_cheb(adj, feat, 2.0)
        assert F.allclose(out_cheb, out_dense_cheb)

+
 @parametrize_idtype
-@pytest.mark.parametrize('norm_type', ['both', 'right', 'none'])
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("norm_type", ["both", "right", "none"])
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"])
+)
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_dense_graph_conv(idtype, g, norm_type, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype("default")
    conv = nn.GraphConv(5, out_dim, norm=norm_type, bias=True)
    dense_conv = nn.DenseGraphConv(5, out_dim, norm=norm_type, bias=True)
    conv.initialize(ctx=ctx)
    dense_conv.initialize(ctx=ctx)
-    dense_conv.weight.set_data(
-        conv.weight.data())
-    dense_conv.bias.set_data(
-        conv.bias.data())
+    dense_conv.weight.set_data(conv.weight.data())
+    dense_conv.bias.set_data(conv.bias.data())
    feat = F.randn((g.number_of_src_nodes(), 5))
    out_conv = conv(g, feat)
    out_dense_conv = dense_conv(adj, feat)
    assert F.allclose(out_conv, out_dense_conv)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'bipartite', 'block-bipartite']))
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "bipartite", "block-bipartite"])
+)
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_dense_sage_conv(idtype, g, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype('default')
-    sage = nn.SAGEConv(5, out_dim, 'gcn')
+    adj = g.adjacency_matrix(transpose=True, ctx=ctx).tostype("default")
+    sage = nn.SAGEConv(5, out_dim, "gcn")
    dense_sage = nn.DenseSAGEConv(5, out_dim)
    sage.initialize(ctx=ctx)
    dense_sage.initialize(ctx=ctx)
-    dense_sage.fc.weight.set_data(
-        sage.fc_neigh.weight.data())
-    dense_sage.fc.bias.set_data(
-        sage.fc_neigh.bias.data())
+    dense_sage.fc.weight.set_data(sage.fc_neigh.weight.data())
+    dense_sage.fc.bias.set_data(sage.fc_neigh.bias.data())
    if len(g.ntypes) == 2:
        feat = (
            F.randn((g.number_of_src_nodes(), 5)),
-            F.randn((g.number_of_dst_nodes(), 5))
+            F.randn((g.number_of_dst_nodes(), 5)),
        )
    else:
        feat = F.randn((g.number_of_nodes(), 5))
@@ -383,9 +427,12 @@ def test_dense_sage_conv(idtype, g, out_dim):
    out_dense_sage = dense_sage(adj, feat)
    assert F.allclose(out_sage, out_dense_sage)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"])
+)
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_edge_conv(g, idtype, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -397,9 +444,10 @@ def test_edge_conv(g, idtype, out_dim):
    h1 = edge_conv(g, h0)
    assert h1.shape == (g.number_of_dst_nodes(), out_dim)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"]))
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_edge_conv_bi(g, idtype, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -412,9 +460,10 @@ def test_edge_conv_bi(g, idtype, out_dim):
    h1 = edge_conv(g, (h0, x0))
    assert h1.shape == (g.number_of_dst_nodes(), out_dim)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite']))
-@pytest.mark.parametrize('aggregator_type', ['mean', 'max', 'sum'])
+@pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"]))
+@pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"])
 def test_gin_conv(g, idtype, aggregator_type):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -428,9 +477,10 @@ def test_gin_conv(g, idtype, aggregator_type):
    h = gin_conv(g, feat)
    assert h.shape == (g.number_of_dst_nodes(), 5)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite']))
-@pytest.mark.parametrize('aggregator_type', ['mean', 'max', 'sum'])
+@pytest.mark.parametrize("g", get_cases(["bipartite"]))
+@pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"])
 def test_gin_conv_bi(g, idtype, aggregator_type):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -440,29 +490,35 @@ def test_gin_conv_bi(g, idtype, aggregator_type):
    print(gin_conv)

    # test #2: bipartite
-    feat = (F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)))
+    feat = (
+        F.randn((g.number_of_src_nodes(), 5)),
+        F.randn((g.number_of_dst_nodes(), 5)),
+    )
    h = gin_conv(g, feat)
    return h.shape == (g.number_of_dst_nodes(), 5)


 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree']))
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"])
+)
 def test_gmm_conv(g, idtype):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    gmm_conv = nn.GMMConv(5, 2, 5, 3, 'max')
+    gmm_conv = nn.GMMConv(5, 2, 5, 3, "max")
    gmm_conv.initialize(ctx=ctx)
    h0 = F.randn((g.number_of_src_nodes(), 5))
    pseudo = F.randn((g.number_of_edges(), 5))
    h1 = gmm_conv(g, h0, pseudo)
    assert h1.shape == (g.number_of_dst_nodes(), 2)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree']))
+@pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"]))
 def test_gmm_conv_bi(g, idtype):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    gmm_conv = nn.GMMConv((5, 4), 2, 5, 3, 'max')
+    gmm_conv = nn.GMMConv((5, 4), 2, 5, 3, "max")
    gmm_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_src_nodes(), 5))
@@ -471,12 +527,13 @@ def test_gmm_conv_bi(g, idtype):
    h1 = gmm_conv(g, (h0, hd), pseudo)
    assert h1.shape == (g.number_of_dst_nodes(), 2)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite']))
+@pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"]))
 def test_nn_conv(g, idtype):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    nn_conv = nn.NNConv(5, 2, gluon.nn.Embedding(3, 5 * 2), 'max')
+    nn_conv = nn.NNConv(5, 2, gluon.nn.Embedding(3, 5 * 2), "max")
    nn_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_src_nodes(), 5))
@@ -484,12 +541,13 @@ def test_nn_conv(g, idtype):
    h1 = nn_conv(g, h0, etypes)
    assert h1.shape == (g.number_of_dst_nodes(), 2)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite']))
+@pytest.mark.parametrize("g", get_cases(["bipartite"]))
 def test_nn_conv_bi(g, idtype):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    nn_conv = nn.NNConv((5, 4), 2, gluon.nn.Embedding(3, 5 * 2), 'max')
+    nn_conv = nn.NNConv((5, 4), 2, gluon.nn.Embedding(3, 5 * 2), "max")
    nn_conv.initialize(ctx=ctx)
    # test #1: basic
    h0 = F.randn((g.number_of_src_nodes(), 5))
@@ -498,7 +556,8 @@ def test_nn_conv_bi(g, idtype):
    h1 = nn_conv(g, (h0, hd), etypes)
    assert h1.shape == (g.number_of_dst_nodes(), 2)

-@pytest.mark.parametrize('out_dim', [1, 2])
+
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_sg_conv(out_dim):
    g = dgl.from_networkx(nx.erdos_renyi_graph(20, 0.3)).to(F.ctx())
    g = dgl.add_self_loop(g)
@@ -513,11 +572,12 @@ def test_sg_conv(out_dim):
    h1 = sgc(g, h0)
    assert h1.shape == (g.number_of_nodes(), out_dim)

+
 def test_set2set():
    g = dgl.from_networkx(nx.path_graph(10)).to(F.ctx())
    ctx = F.ctx()

-    s2s = nn.Set2Set(5, 3, 3) # hidden size 5, 3 iters, 3 layers
+    s2s = nn.Set2Set(5, 3, 3)  # hidden size 5, 3 iters, 3 layers
    s2s.initialize(ctx=ctx)
    print(s2s)

@@ -532,6 +592,7 @@ def test_set2set():
    h1 = s2s(bg, h0)
    assert h1.shape[0] == 3 and h1.shape[1] == 10 and h1.ndim == 2

+
 def test_glob_att_pool():
    g = dgl.from_networkx(nx.path_graph(10)).to(F.ctx())
    ctx = F.ctx()
@@ -550,13 +611,14 @@ def test_glob_att_pool():
    h1 = gap(bg, h0)
    assert h1.shape[0] == 4 and h1.shape[1] == 10 and h1.ndim == 2

+
 def test_simple_pool():
    g = dgl.from_networkx(nx.path_graph(15)).to(F.ctx())

    sum_pool = nn.SumPooling()
    avg_pool = nn.AvgPooling()
    max_pool = nn.MaxPooling()
-    sort_pool = nn.SortPooling(10) # k = 10
+    sort_pool = nn.SortPooling(10)  # k = 10
    print(sum_pool, avg_pool, max_pool, sort_pool)

    # test#1: basic
@@ -575,33 +637,43 @@ def test_simple_pool():
    bg = dgl.batch([g, g_, g, g_, g])
    h0 = F.randn((bg.number_of_nodes(), 5))
    h1 = sum_pool(bg, h0)
-    truth = mx.nd.stack(F.sum(h0[:15], 0),
-                        F.sum(h0[15:20], 0),
-                        F.sum(h0[20:35], 0),
-                        F.sum(h0[35:40], 0),
-                        F.sum(h0[40:55], 0), axis=0)
+    truth = mx.nd.stack(
+        F.sum(h0[:15], 0),
+        F.sum(h0[15:20], 0),
+        F.sum(h0[20:35], 0),
+        F.sum(h0[35:40], 0),
+        F.sum(h0[40:55], 0),
+        axis=0,
+    )
    check_close(h1, truth)

    h1 = avg_pool(bg, h0)
-    truth = mx.nd.stack(F.mean(h0[:15], 0),
-                        F.mean(h0[15:20], 0),
-                        F.mean(h0[20:35], 0),
-                        F.mean(h0[35:40], 0),
-                        F.mean(h0[40:55], 0), axis=0)
+    truth = mx.nd.stack(
+        F.mean(h0[:15], 0),
+        F.mean(h0[15:20], 0),
+        F.mean(h0[20:35], 0),
+        F.mean(h0[35:40], 0),
+        F.mean(h0[40:55], 0),
+        axis=0,
+    )
    check_close(h1, truth)

    h1 = max_pool(bg, h0)
-    truth = mx.nd.stack(F.max(h0[:15], 0),
-                        F.max(h0[15:20], 0),
-                        F.max(h0[20:35], 0),
-                        F.max(h0[35:40], 0),
-                        F.max(h0[40:55], 0), axis=0)
+    truth = mx.nd.stack(
+        F.max(h0[:15], 0),
+        F.max(h0[15:20], 0),
+        F.max(h0[20:35], 0),
+        F.max(h0[35:40], 0),
+        F.max(h0[40:55], 0),
+        axis=0,
+    )
    check_close(h1, truth)

    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2

-@pytest.mark.parametrize('O', [1, 2, 8])
+
+@pytest.mark.parametrize("O", [1, 2, 8])
 def test_rgcn(O):
    ctx = F.ctx()
    etype = []
@@ -654,6 +726,7 @@ def test_rgcn(O):
    h_new = rgc_basis(g, h, r)
    assert list(h_new.shape) == [100, O]

+
 def test_sequential():
    ctx = F.ctx()
    # test single graph
@@ -663,11 +736,11 @@ def test_sequential():

        def forward(self, graph, n_feat, e_feat):
            graph = graph.local_var()
-            graph.ndata['h'] = n_feat
-            graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
-            n_feat += graph.ndata['h']
-            graph.apply_edges(fn.u_add_v('h', 'h', 'e'))
-            e_feat += graph.edata['e']
+            graph.ndata["h"] = n_feat
+            graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h"))
+            n_feat += graph.ndata["h"]
+            graph.apply_edges(fn.u_add_v("h", "h", "e"))
+            e_feat += graph.edata["e"]
            return n_feat, e_feat

    g = dgl.graph(([], [])).to(F.ctx())
@@ -691,9 +764,9 @@ def test_sequential():

        def forward(self, graph, n_feat):
            graph = graph.local_var()
-            graph.ndata['h'] = n_feat
-            graph.update_all(fn.copy_u('h', 'm'), fn.sum('m', 'h'))
-            n_feat += graph.ndata['h']
+            graph.ndata["h"] = n_feat
+            graph.update_all(fn.copy_u("h", "m"), fn.sum("m", "h"))
+            n_feat += graph.ndata["h"]
            return n_feat.reshape(graph.number_of_nodes() // 2, 2, -1).sum(1)

    g1 = dgl.from_networkx(nx.erdos_renyi_graph(32, 0.05)).to(F.ctx())
@@ -709,58 +782,75 @@ def test_sequential():
    n_feat = net([g1, g2, g3], n_feat)
    assert n_feat.shape == (4, 4)

+
 def myagg(alist, dsttype):
    rst = alist[0]
    for i in range(1, len(alist)):
        rst = rst + (i + 1) * alist[i]
    return rst

+
 @parametrize_idtype
-@pytest.mark.parametrize('agg', ['sum', 'max', 'min', 'mean', 'stack', myagg])
+@pytest.mark.parametrize("agg", ["sum", "max", "min", "mean", "stack", myagg])
 def test_hetero_conv(agg, idtype):
-    g = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 0, 2, 1], [1, 2, 1, 3]),
-        ('user', 'plays', 'game'): ([0, 0, 0, 1, 2], [0, 2, 3, 0, 2]),
-        ('store', 'sells', 'game'): ([0, 0, 1, 1], [0, 3, 1, 2])},
-        idtype=idtype, device=F.ctx())
-    conv = nn.HeteroGraphConv({
-        'follows': nn.GraphConv(2, 3, allow_zero_in_degree=True),
-        'plays': nn.GraphConv(2, 4, allow_zero_in_degree=True),
-        'sells': nn.GraphConv(3, 4, allow_zero_in_degree=True)},
-        agg)
+    g = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 0, 2, 1], [1, 2, 1, 3]),
+            ("user", "plays", "game"): ([0, 0, 0, 1, 2], [0, 2, 3, 0, 2]),
+            ("store", "sells", "game"): ([0, 0, 1, 1], [0, 3, 1, 2]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    conv = nn.HeteroGraphConv(
+        {
+            "follows": nn.GraphConv(2, 3, allow_zero_in_degree=True),
+            "plays": nn.GraphConv(2, 4, allow_zero_in_degree=True),
+            "sells": nn.GraphConv(3, 4, allow_zero_in_degree=True),
+        },
+        agg,
+    )
    conv.initialize(ctx=F.ctx())
    print(conv)
    uf = F.randn((4, 2))
    gf = F.randn((4, 4))
    sf = F.randn((2, 3))

-    h = conv(g, {'user': uf, 'store': sf, 'game': gf})
-    assert set(h.keys()) == {'user', 'game'}
-    if agg != 'stack':
-        assert h['user'].shape == (4, 3)
-        assert h['game'].shape == (4, 4)
+    h = conv(g, {"user": uf, "store": sf, "game": gf})
+    assert set(h.keys()) == {"user", "game"}
+    if agg != "stack":
+        assert h["user"].shape == (4, 3)
+        assert h["game"].shape == (4, 4)
    else:
-        assert h['user'].shape == (4, 1, 3)
-        assert h['game'].shape == (4, 2, 4)
-
-    block = dgl.to_block(g.to(F.cpu()), {'user': [0, 1, 2, 3], 'game': [0, 1, 2, 3], 'store': []}).to(F.ctx())
-    h = conv(block, ({'user': uf, 'game': gf, 'store': sf}, {'user': uf, 'game': gf, 'store': sf[0:0]}))
-    assert set(h.keys()) == {'user', 'game'}
-    if agg != 'stack':
-        assert h['user'].shape == (4, 3)
-        assert h['game'].shape == (4, 4)
+        assert h["user"].shape == (4, 1, 3)
+        assert h["game"].shape == (4, 2, 4)
+
+    block = dgl.to_block(
+        g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []}
+    ).to(F.ctx())
+    h = conv(
+        block,
+        (
+            {"user": uf, "game": gf, "store": sf},
+            {"user": uf, "game": gf, "store": sf[0:0]},
+        ),
+    )
+    assert set(h.keys()) == {"user", "game"}
+    if agg != "stack":
+        assert h["user"].shape == (4, 3)
+        assert h["game"].shape == (4, 4)
    else:
-        assert h['user'].shape == (4, 1, 3)
-        assert h['game'].shape == (4, 2, 4)
-
-    h = conv(block, {'user': uf, 'game': gf, 'store': sf})
-    assert set(h.keys()) == {'user', 'game'}
-    if agg != 'stack':
-        assert h['user'].shape == (4, 3)
-        assert h['game'].shape == (4, 4)
+        assert h["user"].shape == (4, 1, 3)
+        assert h["game"].shape == (4, 2, 4)
+
+    h = conv(block, {"user": uf, "game": gf, "store": sf})
+    assert set(h.keys()) == {"user", "game"}
+    if agg != "stack":
+        assert h["user"].shape == (4, 3)
+        assert h["game"].shape == (4, 4)
    else:
-        assert h['user'].shape == (4, 1, 3)
-        assert h['game'].shape == (4, 2, 4)
+        assert h["user"].shape == (4, 1, 3)
+        assert h["game"].shape == (4, 2, 4)

    # test with mod args
    class MyMod(mx.gluon.nn.Block):
@@ -769,39 +859,46 @@ def test_hetero_conv(agg, idtype):
            self.carg1 = 0
            self.s1 = s1
            self.s2 = s2
+
        def forward(self, g, h, arg1=None):  # mxnet does not support kwargs
            if arg1 is not None:
                self.carg1 += 1
            return F.zeros((g.number_of_dst_nodes(), self.s2))
+
    mod1 = MyMod(2, 3)
    mod2 = MyMod(2, 4)
    mod3 = MyMod(3, 4)
-    conv = nn.HeteroGraphConv({
-        'follows': mod1,
-        'plays': mod2,
-        'sells': mod3},
-        agg)
+    conv = nn.HeteroGraphConv(
+        {"follows": mod1, "plays": mod2, "sells": mod3}, agg
+    )
    conv.initialize(ctx=F.ctx())
-    mod_args = {'follows' : (1,), 'plays' : (1,)}
-    h = conv(g, {'user' : uf, 'store' : sf, 'game': gf}, mod_args)
+    mod_args = {"follows": (1,), "plays": (1,)}
+    h = conv(g, {"user": uf, "store": sf, "game": gf}, mod_args)
    assert mod1.carg1 == 1
    assert mod2.carg1 == 1
    assert mod3.carg1 == 0

-    #conv on graph without any edges
+    # conv on graph without any edges
    for etype in g.etypes:
-        g = dgl.remove_edges(g, g.edges(form='eid', etype=etype), etype=etype)
+        g = dgl.remove_edges(g, g.edges(form="eid", etype=etype), etype=etype)
    assert g.num_edges() == 0
-    h = conv(g, {'user': uf, 'game': gf, 'store': sf})
-    assert set(h.keys()) == {'user', 'game'}
-
-    block = dgl.to_block(g.to(F.cpu()), {'user': [0, 1, 2, 3], 'game': [
-                         0, 1, 2, 3], 'store': []}).to(F.ctx())
-    h = conv(block, ({'user': uf, 'game': gf, 'store': sf},
-             {'user': uf, 'game': gf, 'store': sf[0:0]}))
-    assert set(h.keys()) == {'user', 'game'}
-
-if __name__ == '__main__':
+    h = conv(g, {"user": uf, "game": gf, "store": sf})
+    assert set(h.keys()) == {"user", "game"}
+
+    block = dgl.to_block(
+        g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []}
+    ).to(F.ctx())
+    h = conv(
+        block,
+        (
+            {"user": uf, "game": gf, "store": sf},
+            {"user": uf, "game": gf, "store": sf[0:0]},
+        ),
+    )
+    assert set(h.keys()) == {"user", "game"}
+
+
+if __name__ == "__main__":
    test_graph_conv()
    test_gat_conv()
    test_sage_conv()

--- a/tests/pytorch/mock_sparse/test_elementwise_op_sp.py
+++ b/tests/pytorch/mock_sparse/test_elementwise_op_sp.py
+import operator
+
 import numpy as np
 import pytest
-import dgl
 import torch
-import operator
+
+import dgl
 from dgl.mock_sparse import SparseMatrix, diag

 parametrize_idtype = pytest.mark.parametrize(

--- a/tests/pytorch/test_dist_optim.py
+++ b/tests/pytorch/test_dist_optim.py
 import os
-os.environ['OMP_NUM_THREADS'] = '1'
-import dgl
+
+os.environ["OMP_NUM_THREADS"] = "1"
+import multiprocessing as mp
+import pickle
+import random
+import socket
 import sys
-import numpy as np
 import time
-import socket
-from scipy import sparse as spsp
+import unittest
+
+import backend as F
+import numpy as np
 import torch as th
+from scipy import sparse as spsp

-from dgl.distributed import DistGraphServer, DistGraph
-from dgl.distributed import partition_graph, load_partition_book
-import multiprocessing as mp
+import dgl
 from dgl import function as fn
-import backend as F
-import unittest
-import pickle
-import random
-from dgl.distributed import DistEmbedding
+from dgl.distributed import (
+    DistEmbedding,
+    DistGraph,
+    DistGraphServer,
+    load_partition_book,
+    partition_graph,
+)
 from dgl.distributed.optim import SparseAdagrad, SparseAdam

+
 def create_random_graph(n):
-    arr = (spsp.random(n, n, density=0.001, format='coo', random_state=100) != 0).astype(np.int64)
+    arr = (
+        spsp.random(n, n, density=0.001, format="coo", random_state=100) != 0
+    ).astype(np.int64)
    return dgl.from_scipy(arr)

+
 def get_local_usable_addr():
    """Get local usable IP and port

@@ -34,10 +44,10 @@ def get_local_usable_addr():
    sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
    try:
        # doesn't even have to be reachable
-        sock.connect(('10.255.255.255', 1))
+        sock.connect(("10.255.255.255", 1))
        ip_addr = sock.getsockname()[0]
    except ValueError:
-        ip_addr = '127.0.0.1'
+        ip_addr = "127.0.0.1"
    finally:
        sock.close()
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -46,40 +56,62 @@ def get_local_usable_addr():
    port = sock.getsockname()[1]
    sock.close()

-    return ip_addr + ' ' + str(port)
+    return ip_addr + " " + str(port)
+

 def prepare_dist():
    ip_config = open("optim_ip_config.txt", "w")
    ip_addr = get_local_usable_addr()
-    ip_config.write('{}\n'.format(ip_addr))
+    ip_config.write("{}\n".format(ip_addr))
    ip_config.close()

+
 def run_server(graph_name, server_id, server_count, num_clients, shared_mem):
-    g = DistGraphServer(server_id, "optim_ip_config.txt", num_clients, server_count,
-                        '/tmp/dist_graph/{}.json'.format(graph_name),
-                        disable_shared_mem=not shared_mem)
-    print('start server', server_id)
+    g = DistGraphServer(
+        server_id,
+        "optim_ip_config.txt",
+        num_clients,
+        server_count,
+        "/tmp/dist_graph/{}.json".format(graph_name),
+        disable_shared_mem=not shared_mem,
+    )
+    print("start server", server_id)
    g.start()

+
 def initializer(shape, dtype):
    arr = th.zeros(shape, dtype=dtype)
    th.manual_seed(0)
    th.nn.init.uniform_(arr, 0, 1.0)
    return arr

+
 def run_client(graph_name, cli_id, part_id, server_count):
-    device=F.ctx()
+    device = F.ctx()
    time.sleep(5)
-    os.environ['DGL_NUM_SERVER'] = str(server_count)
+    os.environ["DGL_NUM_SERVER"] = str(server_count)
    dgl.distributed.initialize("optim_ip_config.txt")
-    gpb, graph_name, _, _ = load_partition_book('/tmp/dist_graph/{}.json'.format(graph_name),
-                                                part_id, None)
+    gpb, graph_name, _, _ = load_partition_book(
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+    )
    g = DistGraph(graph_name, gpb=gpb)
-    policy = dgl.distributed.PartitionPolicy('node', g.get_partition_book())
+    policy = dgl.distributed.PartitionPolicy("node", g.get_partition_book())
    num_nodes = g.number_of_nodes()
    emb_dim = 4
-    dgl_emb = DistEmbedding(num_nodes, emb_dim, name='optim', init_func=initializer, part_policy=policy)
-    dgl_emb_zero = DistEmbedding(num_nodes, emb_dim, name='optim-zero', init_func=initializer, part_policy=policy)
+    dgl_emb = DistEmbedding(
+        num_nodes,
+        emb_dim,
+        name="optim",
+        init_func=initializer,
+        part_policy=policy,
+    )
+    dgl_emb_zero = DistEmbedding(
+        num_nodes,
+        emb_dim,
+        name="optim-zero",
+        init_func=initializer,
+        part_policy=policy,
+    )
    dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01)
    dgl_adam._world_size = 1
    dgl_adam._rank = 0
@@ -91,11 +123,13 @@ def run_client(graph_name, cli_id, part_id, server_count):
    th.manual_seed(0)
    th.nn.init.uniform_(torch_emb_zero.weight, 0, 1.0)
    torch_adam = th.optim.SparseAdam(
-        list(torch_emb.parameters()) + list(torch_emb_zero.parameters()), lr=0.01)
+        list(torch_emb.parameters()) + list(torch_emb_zero.parameters()),
+        lr=0.01,
+    )

    labels = th.ones((4,)).long()
    idx = th.randint(0, num_nodes, size=(4,))
-    dgl_value = dgl_emb(idx, device).to(th.device('cpu'))
+    dgl_value = dgl_emb(idx, device).to(th.device("cpu"))
    torch_value = torch_emb(idx)
    torch_adam.zero_grad()
    torch_loss = th.nn.functional.cross_entropy(torch_value, labels)
@@ -107,7 +141,10 @@ def run_client(graph_name, cli_id, part_id, server_count):
    dgl_loss.backward()
    dgl_adam.step()

-    assert F.allclose(dgl_emb.weight[0 : num_nodes//2], torch_emb.weight[0 : num_nodes//2])
+    assert F.allclose(
+        dgl_emb.weight[0 : num_nodes // 2], torch_emb.weight[0 : num_nodes // 2]
+    )
+

 def check_sparse_adam(num_trainer=1, shared_mem=True):
    prepare_dist()
@@ -116,23 +153,27 @@ def check_sparse_adam(num_trainer=1, shared_mem=True):
    num_clients = num_trainer
    num_parts = 1

-    graph_name = 'dist_graph_test'
-    partition_graph(g, graph_name, num_parts, '/tmp/dist_graph')
+    graph_name = "dist_graph_test"
+    partition_graph(g, graph_name, num_parts, "/tmp/dist_graph")

    # let's just test on one partition for now.
    # We cannot run multiple servers and clients on the same machine.
    serv_ps = []
-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for serv_id in range(num_servers):
-        p = ctx.Process(target=run_server, args=(graph_name, serv_id, num_servers,
-                                                 num_clients, shared_mem))
+        p = ctx.Process(
+            target=run_server,
+            args=(graph_name, serv_id, num_servers, num_clients, shared_mem),
+        )
        serv_ps.append(p)
        p.start()

    cli_ps = []
    for cli_id in range(num_clients):
-        print('start client', cli_id)
-        p = ctx.Process(target=run_client, args=(graph_name, cli_id, 0, num_servers))
+        print("start client", cli_id)
+        p = ctx.Process(
+            target=run_client, args=(graph_name, cli_id, 0, num_servers)
+        )
        p.start()
        cli_ps.append(p)

@@ -142,12 +183,14 @@ def check_sparse_adam(num_trainer=1, shared_mem=True):
    for p in serv_ps:
        p.join()

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
 def test_sparse_opt():
-    os.environ['DGL_DIST_MODE'] = 'distributed'
+    os.environ["DGL_DIST_MODE"] = "distributed"
    check_sparse_adam(1, True)
    check_sparse_adam(1, False)

-if __name__ == '__main__':
-    os.makedirs('/tmp/dist_graph', exist_ok=True)
-    test_sparse_opt()
\ No newline at end of file
+
+if __name__ == "__main__":
+    os.makedirs("/tmp/dist_graph", exist_ok=True)
+    test_sparse_opt()
--- a/tests/pytorch/test_geometry.py
+++ b/tests/pytorch/test_geometry.py
 import backend as F
-import dgl.nn
-import dgl
 import numpy as np
 import pytest
 import torch as th
-from dgl import DGLError
-from dgl.base import DGLWarning
-from dgl.geometry import neighbor_matching, farthest_point_sampler
 from test_utils import parametrize_idtype
 from test_utils.graph_cases import get_cases

+import dgl
+import dgl.nn
+from dgl import DGLError
+from dgl.base import DGLWarning
+from dgl.geometry import farthest_point_sampler, neighbor_matching
+

 def test_fps():
    N = 1000
    batch_size = 5
    sample_points = 10
-    x = th.tensor(np.random.uniform(size=(batch_size, int(N/batch_size), 3)))
+    x = th.tensor(np.random.uniform(size=(batch_size, int(N / batch_size), 3)))
    ctx = F.ctx()
    if F.gpu_ctx():
        x = x.to(ctx)
@@ -29,17 +30,18 @@ def test_fps_start_idx():
    N = 1000
    batch_size = 5
    sample_points = 10
-    x = th.tensor(np.random.uniform(size=(batch_size, int(N/batch_size), 3)))
+    x = th.tensor(np.random.uniform(size=(batch_size, int(N / batch_size), 3)))
    ctx = F.ctx()
    if F.gpu_ctx():
        x = x.to(ctx)
    res = farthest_point_sampler(x, sample_points, start_idx=0)
    assert th.any(res[:, 0] == 0)

+
 def _test_knn_common(device, algorithm, dist, exclude_self):
    x = th.randn(8, 3).to(device)
    kg = dgl.nn.KNNGraph(3)
-    if dist == 'euclidean':
+    if dist == "euclidean":
        d = th.cdist(x, x).to(F.cpu())
    else:
        x = x + th.randn(1).item()
@@ -55,7 +57,14 @@ def _test_knn_common(device, algorithm, dist, exclude_self):
            assert len(src) == k
            if check_indices:
                i = v - start
-                src_ans = set(th.topk(d[start:end, start:end][i], k + (1 if exclude_self else 0), largest=False)[1].numpy() + start)
+                src_ans = set(
+                    th.topk(
+                        d[start:end, start:end][i],
+                        k + (1 if exclude_self else 0),
+                        largest=False,
+                    )[1].numpy()
+                    + start
+                )
                if exclude_self:
                    # remove self
                    src_ans.remove(v)
@@ -63,7 +72,9 @@ def _test_knn_common(device, algorithm, dist, exclude_self):

    def check_batch(g, k, expected_batch_info):
        assert F.array_equal(g.batch_num_nodes(), F.tensor(expected_batch_info))
-        assert F.array_equal(g.batch_num_edges(), k*F.tensor(expected_batch_info))
+        assert F.array_equal(
+            g.batch_num_edges(), k * F.tensor(expected_batch_info)
+        )

    # check knn with 2d input
    g = kg(x, algorithm, dist, exclude_self)
@@ -145,23 +156,27 @@ def _test_knn_common(device, algorithm, dist, exclude_self):
    kg = dgl.nn.SegmentedKNNGraph(3)
    g = kg(x, [4, 7, 5, 4], algorithm, dist, exclude_self)
    # different algorithms may break the tie differently, so don't check the indices
-    check_knn(g, x,  0,  4, 3, exclude_self, False)
-    check_knn(g, x,  4, 11, 3, exclude_self, False)
+    check_knn(g, x, 0, 4, 3, exclude_self, False)
+    check_knn(g, x, 4, 11, 3, exclude_self, False)
    check_knn(g, x, 11, 16, 3, exclude_self, False)
    check_knn(g, x, 16, 20, 3, exclude_self, False)
    check_batch(g, 3, [4, 7, 5, 4])


-@pytest.mark.parametrize('algorithm', ['bruteforce-blas', 'bruteforce', 'kd-tree'])
-@pytest.mark.parametrize('dist', ['euclidean', 'cosine'])
-@pytest.mark.parametrize('exclude_self', [False, True])
+@pytest.mark.parametrize(
+    "algorithm", ["bruteforce-blas", "bruteforce", "kd-tree"]
+)
+@pytest.mark.parametrize("dist", ["euclidean", "cosine"])
+@pytest.mark.parametrize("exclude_self", [False, True])
 def test_knn_cpu(algorithm, dist, exclude_self):
    _test_knn_common(F.cpu(), algorithm, dist, exclude_self)


-@pytest.mark.parametrize('algorithm', ['bruteforce-blas', 'bruteforce', 'bruteforce-sharemem'])
-@pytest.mark.parametrize('dist', ['euclidean', 'cosine'])
-@pytest.mark.parametrize('exclude_self', [False, True])
+@pytest.mark.parametrize(
+    "algorithm", ["bruteforce-blas", "bruteforce", "bruteforce-sharemem"]
+)
+@pytest.mark.parametrize("dist", ["euclidean", "cosine"])
+@pytest.mark.parametrize("exclude_self", [False, True])
 def test_knn_cuda(algorithm, dist, exclude_self):
    if not th.cuda.is_available():
        return
@@ -169,9 +184,9 @@ def test_knn_cuda(algorithm, dist, exclude_self):


 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['dglgraph']))
-@pytest.mark.parametrize('weight', [True, False])
-@pytest.mark.parametrize('relabel', [True, False])
+@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["dglgraph"]))
+@pytest.mark.parametrize("weight", [True, False])
+@pytest.mark.parametrize("relabel", [True, False])
 def test_edge_coarsening(idtype, g, weight, relabel):
    num_nodes = g.num_nodes()
    g = dgl.to_bidirected(g)
@@ -205,7 +220,7 @@ def test_edge_coarsening(idtype, g, weight, relabel):
            assert g.has_edges_between(u, v)


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_fps()
    test_fps_start_idx()
    test_knn()
--- a/tests/pytorch/test_ipc.py
+++ b/tests/pytorch/test_ipc.py
-import dgl
-import torch as th
-import torch.multiprocessing as mp
 import os
 import unittest

+import torch as th
+import torch.multiprocessing as mp
+
+import dgl
+
+
 def sub_ipc(g):
    print(g)
    return g

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
 def test_torch_ipc():
    g = dgl.graph(([0, 1, 2], [1, 2, 3]))
    ctx = mp.get_context("spawn")
-    p = ctx.Process(target=sub_ipc, args=(g, ))
+    p = ctx.Process(target=sub_ipc, args=(g,))

    p.start()
    p.join()


 if __name__ == "__main__":
-    test_torch_ipc()
\ No newline at end of file
+    test_torch_ipc()
--- a/tests/pytorch/test_optim.py
+++ b/tests/pytorch/test_optim.py
+import os
 import time
-import torch.multiprocessing as mp
-import unittest, os
-import pytest
+import unittest

-import torch as th
 import backend as F
+import pytest
+import torch as th
+import torch.multiprocessing as mp

 from dgl.nn import NodeEmbedding
-from dgl.optim import SparseAdam, SparseAdagrad
+from dgl.optim import SparseAdagrad, SparseAdam
+

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@pytest.mark.parametrize('emb_dim', [1, 4, 101, 1024])
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@pytest.mark.parametrize("emb_dim", [1, 4, 101, 1024])
 def test_sparse_adam(emb_dim):
    num_embs = 10
-    device=F.ctx()
-    dgl_emb = NodeEmbedding(num_embs, emb_dim, 'test')
+    device = F.ctx()
+    dgl_emb = NodeEmbedding(num_embs, emb_dim, "test")
    torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True)
    th.manual_seed(0)
    th.nn.init.uniform_(torch_emb.weight, 0, 1.0)
@@ -26,7 +28,7 @@ def test_sparse_adam(emb_dim):

    # first step
    idx = th.randint(0, num_embs, size=(4,))
-    dgl_value = dgl_emb(idx, device).to(th.device('cpu'))
+    dgl_value = dgl_emb(idx, device).to(th.device("cpu"))
    torch_value = torch_emb(idx)
    labels = th.zeros((4,)).long()
    print("dgl_value = {}".format(dgl_value))
@@ -47,17 +49,18 @@ def test_sparse_adam(emb_dim):
    # Pytorch sparseAdam maintains a global step
    # DGL sparseAdam use a per embedding step

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@pytest.mark.parametrize('use_uva', [False, True, None])
-@pytest.mark.parametrize('emb_dim', [1, 4, 101, 1024])
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@pytest.mark.parametrize("use_uva", [False, True, None])
+@pytest.mark.parametrize("emb_dim", [1, 4, 101, 1024])
 def test_sparse_adam_uva(use_uva, emb_dim):
-    if F.ctx().type == 'cpu' and use_uva == True:
+    if F.ctx().type == "cpu" and use_uva == True:
        # we want to only test values of False and None when not using GPU
        pytest.skip("UVA cannot be used without GPUs.")

    num_embs = 10
-    device=F.ctx()
-    dgl_emb = NodeEmbedding(num_embs, emb_dim, 'test_uva{}'.format(use_uva))
+    device = F.ctx()
+    dgl_emb = NodeEmbedding(num_embs, emb_dim, "test_uva{}".format(use_uva))
    torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True)
    th.manual_seed(0)
    th.nn.init.uniform_(torch_emb.weight, 0, 1.0)
@@ -69,7 +72,7 @@ def test_sparse_adam_uva(use_uva, emb_dim):

    # first step
    idx = th.randint(0, num_embs, size=(4,))
-    dgl_value = dgl_emb(idx, device).to(th.device('cpu'))
+    dgl_value = dgl_emb(idx, device).to(th.device("cpu"))
    torch_value = torch_emb(idx)
    labels = th.zeros((4,)).long()

@@ -88,13 +91,14 @@ def test_sparse_adam_uva(use_uva, emb_dim):
    # Pytorch sparseAdam maintains a global step
    # DGL sparseAdam use a per embedding step

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@pytest.mark.parametrize('dtype', [th.float32, th.float16])
-@pytest.mark.parametrize('emb_dim', [1, 4, 101, 1024])
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@pytest.mark.parametrize("dtype", [th.float32, th.float16])
+@pytest.mark.parametrize("emb_dim", [1, 4, 101, 1024])
 def test_sparse_adam_dtype(dtype, emb_dim):
    num_embs = 10
-    device=F.ctx()
-    dgl_emb = NodeEmbedding(num_embs, emb_dim, 'test_dtype{}'.format(dtype))
+    device = F.ctx()
+    dgl_emb = NodeEmbedding(num_embs, emb_dim, "test_dtype{}".format(dtype))
    torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True)
    th.manual_seed(0)
    th.nn.init.uniform_(torch_emb.weight, 0, 1.0)
@@ -106,7 +110,7 @@ def test_sparse_adam_dtype(dtype, emb_dim):

    # first step
    idx = th.randint(0, num_embs, size=(4,))
-    dgl_value = dgl_emb(idx, device).to(th.device('cpu'))
+    dgl_value = dgl_emb(idx, device).to(th.device("cpu"))
    torch_value = torch_emb(idx)
    labels = th.zeros((4,)).long()

@@ -126,15 +130,14 @@ def test_sparse_adam_dtype(dtype, emb_dim):
    # DGL sparseAdam use a per embedding step


-
-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
 def test_sparse_adam_zero_step():
    num_embs = 10
    emb_dim = 4
-    device=F.ctx()
-    dgl_emb = NodeEmbedding(num_embs, emb_dim, 'test')
+    device = F.ctx()
+    dgl_emb = NodeEmbedding(num_embs, emb_dim, "test")
    torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True)
-    dgl_emb_zero = NodeEmbedding(num_embs, emb_dim, 'test2')
+    dgl_emb_zero = NodeEmbedding(num_embs, emb_dim, "test2")
    torch_emb_zero = th.nn.Embedding(num_embs, emb_dim, sparse=True)
    th.manual_seed(0)
    th.nn.init.uniform_(torch_emb.weight, 0, 1.0)
@@ -145,11 +148,13 @@ def test_sparse_adam_zero_step():

    dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01)
    torch_adam = th.optim.SparseAdam(
-        list(torch_emb.parameters()) + list(torch_emb_zero.parameters()), lr=0.01)
+        list(torch_emb.parameters()) + list(torch_emb_zero.parameters()),
+        lr=0.01,
+    )

    # first step
    idx = th.randint(0, num_embs, size=(4,))
-    dgl_value = dgl_emb(idx, device).to(th.device('cpu'))
+    dgl_value = dgl_emb(idx, device).to(th.device("cpu"))
    torch_value = torch_emb(idx)
    labels = th.ones((4,)).long()

@@ -164,33 +169,51 @@ def test_sparse_adam_zero_step():
    torch_adam.step()
    assert F.allclose(dgl_emb.weight, torch_emb.weight)

+
 def initializer(emb):
    th.manual_seed(0)
    emb.uniform_(-1.0, 1.0)
    return emb

-def start_sparse_adam_worker(rank, device, world_size, weight, tensor_dev='cpu', has_zero_grad=False,
-                             backend='gloo', num_embs=128, emb_dim=10):
-    print('start sparse worker for adam {}'.format(rank))
-    dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
-        master_ip='127.0.0.1', master_port='12345')

-    if device.type == 'cuda':
+def start_sparse_adam_worker(
+    rank,
+    device,
+    world_size,
+    weight,
+    tensor_dev="cpu",
+    has_zero_grad=False,
+    backend="gloo",
+    num_embs=128,
+    emb_dim=10,
+):
+    print("start sparse worker for adam {}".format(rank))
+    dist_init_method = "tcp://{master_ip}:{master_port}".format(
+        master_ip="127.0.0.1", master_port="12345"
+    )
+
+    if device.type == "cuda":
        th.cuda.set_device(device)

-    th.distributed.init_process_group(backend=backend,
-                                      init_method=dist_init_method,
-                                      world_size=world_size,
-                                      rank=rank)
+    th.distributed.init_process_group(
+        backend=backend,
+        init_method=dist_init_method,
+        world_size=world_size,
+        rank=rank,
+    )

    init_weight = th.empty((num_embs, emb_dim))
    th.manual_seed(0)
    th.nn.init.uniform_(init_weight, -1.0, 1.0)
-    dgl_emb = NodeEmbedding(num_embs, emb_dim, 'test', init_func=initializer, device=tensor_dev)
+    dgl_emb = NodeEmbedding(
+        num_embs, emb_dim, "test", init_func=initializer, device=tensor_dev
+    )
    dgl_emb.all_set_embedding(init_weight)

    if has_zero_grad:
-        dgl_emb_zero = NodeEmbedding(num_embs, emb_dim, 'zero', init_func=initializer, device=tensor_dev)
+        dgl_emb_zero = NodeEmbedding(
+            num_embs, emb_dim, "zero", init_func=initializer, device=tensor_dev
+        )
        dgl_adam = SparseAdam(params=[dgl_emb, dgl_emb_zero], lr=0.01)
    else:
        dgl_adam = SparseAdam(params=[dgl_emb], lr=0.01)
@@ -215,17 +238,22 @@ def start_sparse_adam_worker(rank, device, world_size, weight, tensor_dev='cpu',
        weight[:] = dgl_weight[:]
    th.distributed.barrier()

-def start_torch_adam_worker(rank, world_size, weight, has_zero_grad=False,
-                            num_embs=128, emb_dim=10):
-    print('start sparse worker for adam {}'.format(rank))
-    dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
-        master_ip='127.0.0.1', master_port='12345')
-    backend='gloo'

-    th.distributed.init_process_group(backend=backend,
-                                      init_method=dist_init_method,
-                                      world_size=world_size,
-                                      rank=rank)
+def start_torch_adam_worker(
+    rank, world_size, weight, has_zero_grad=False, num_embs=128, emb_dim=10
+):
+    print("start sparse worker for adam {}".format(rank))
+    dist_init_method = "tcp://{master_ip}:{master_port}".format(
+        master_ip="127.0.0.1", master_port="12345"
+    )
+    backend = "gloo"
+
+    th.distributed.init_process_group(
+        backend=backend,
+        init_method=dist_init_method,
+        world_size=world_size,
+        rank=rank,
+    )

    torch_emb = th.nn.Embedding(num_embs, emb_dim, sparse=True)
    th.manual_seed(0)
@@ -238,10 +266,14 @@ def start_torch_adam_worker(rank, world_size, weight, has_zero_grad=False,
        th.nn.init.uniform_(torch_emb_zero.weight, -1.0, 1.0)
        torch_emb_zero = th.nn.parallel.DistributedDataParallel(torch_emb_zero)
        torch_adam = th.optim.SparseAdam(
-            list(torch_emb.module.parameters()) + list(torch_emb_zero.module.parameters()),
-            lr=0.01)
+            list(torch_emb.module.parameters())
+            + list(torch_emb_zero.module.parameters()),
+            lr=0.01,
+        )
    else:
-        torch_adam = th.optim.SparseAdam(list(torch_emb.module.parameters()), lr=0.01)
+        torch_adam = th.optim.SparseAdam(
+            list(torch_emb.module.parameters()), lr=0.01
+        )

    start = (num_embs // world_size) * rank
    end = (num_embs // world_size) * (rank + 1)
@@ -259,20 +291,31 @@ def start_torch_adam_worker(rank, world_size, weight, has_zero_grad=False,
        weight[:] = torch_emb.module.weight.cpu()[:]
    th.distributed.barrier()

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@unittest.skipIf(F.ctx().type != 'cpu', reason='cpu only test')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@unittest.skipIf(F.ctx().type != "cpu", reason="cpu only test")
 @pytest.mark.parametrize("num_workers", [2, 4])
 def test_multiprocess_cpu_sparse_adam(num_workers):
-    backend = 'gloo'
+    backend = "gloo"
    worker_list = []
-    num_embs=128
-    emb_dim=10
+    num_embs = 128
+    emb_dim = 10
    dgl_weight = th.empty((num_embs, emb_dim))
-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for i in range(num_workers):
        device = F.ctx()
-        p = ctx.Process(target=start_sparse_adam_worker,
-                        args=(i, device, num_workers, dgl_weight, th.device('cpu'), True, backend))
+        p = ctx.Process(
+            target=start_sparse_adam_worker,
+            args=(
+                i,
+                device,
+                num_workers,
+                dgl_weight,
+                th.device("cpu"),
+                True,
+                backend,
+            ),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -281,8 +324,10 @@ def test_multiprocess_cpu_sparse_adam(num_workers):
    worker_list = []
    torch_weight = th.empty((num_embs, emb_dim))
    for i in range(num_workers):
-        p = ctx.Process(target=start_torch_adam_worker,
-                        args=(i, num_workers, torch_weight, False))
+        p = ctx.Process(
+            target=start_torch_adam_worker,
+            args=(i, num_workers, torch_weight, False),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -290,26 +335,37 @@ def test_multiprocess_cpu_sparse_adam(num_workers):

    assert F.allclose(dgl_weight, torch_weight)

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@unittest.skipIf(F.ctx().type == 'cpu', reason='gpu only test')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@unittest.skipIf(F.ctx().type == "cpu", reason="gpu only test")
 @pytest.mark.parametrize("num_workers", [2, 4, 8])
-@pytest.mark.parametrize("backend", ['nccl', 'gloo'])
+@pytest.mark.parametrize("backend", ["nccl", "gloo"])
 def test_multiprocess_sparse_adam(num_workers, backend):
-    if F.ctx().type == 'cuda' and th.cuda.device_count() < num_workers:
+    if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers:
        pytest.skip("Not enough GPUs to run test.")

    worker_list = []
-    num_embs=128
-    emb_dim=10
+    num_embs = 128
+    emb_dim = 10
    dgl_weight = th.empty((num_embs, emb_dim))
-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for i in range(num_workers):
        device = F.ctx()
-        if device.type == 'cuda':
+        if device.type == "cuda":
            # make sure each process has a unique GPU
            device = th.device(i)
-        p = ctx.Process(target=start_sparse_adam_worker,
-                        args=(i, device, num_workers, dgl_weight, th.device('cpu'), True, backend))
+        p = ctx.Process(
+            target=start_sparse_adam_worker,
+            args=(
+                i,
+                device,
+                num_workers,
+                dgl_weight,
+                th.device("cpu"),
+                True,
+                backend,
+            ),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -318,8 +374,10 @@ def test_multiprocess_sparse_adam(num_workers, backend):
    worker_list = []
    torch_weight = th.empty((num_embs, emb_dim))
    for i in range(num_workers):
-        p = ctx.Process(target=start_torch_adam_worker,
-                        args=(i, num_workers, torch_weight, False))
+        p = ctx.Process(
+            target=start_torch_adam_worker,
+            args=(i, num_workers, torch_weight, False),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -327,25 +385,30 @@ def test_multiprocess_sparse_adam(num_workers, backend):

    assert F.allclose(dgl_weight, torch_weight)

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@unittest.skipIf(F.ctx().type == 'cpu', reason='cuda tensor is not supported for cpu')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@unittest.skipIf(
+    F.ctx().type == "cpu", reason="cuda tensor is not supported for cpu"
+)
 @pytest.mark.parametrize("num_workers", [2, 4, 8])
 def test_multiprocess_sparse_adam_cuda_tensor(num_workers):
-    if F.ctx().type == 'cpu':
+    if F.ctx().type == "cpu":
        pytest.skip("Do not test CPU")
-    if F.ctx().type == 'cuda' and th.cuda.device_count() < num_workers:
+    if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers:
        pytest.skip("Not enough GPUs to run test.")

-    backend = 'nccl'
+    backend = "nccl"
    worker_list = []
-    num_embs=128
-    emb_dim=10
+    num_embs = 128
+    emb_dim = 10
    dgl_weight = th.empty((num_embs, emb_dim))
-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for i in range(num_workers):
        device = th.device(i)
-        p = ctx.Process(target=start_sparse_adam_worker,
-                        args=(i, device, num_workers, dgl_weight, device, False, backend))
+        p = ctx.Process(
+            target=start_sparse_adam_worker,
+            args=(i, device, num_workers, dgl_weight, device, False, backend),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -354,8 +417,10 @@ def test_multiprocess_sparse_adam_cuda_tensor(num_workers):
    worker_list = []
    torch_weight = th.empty((num_embs, emb_dim))
    for i in range(num_workers):
-        p = ctx.Process(target=start_torch_adam_worker,
-                        args=(i, num_workers, torch_weight, False))
+        p = ctx.Process(
+            target=start_torch_adam_worker,
+            args=(i, num_workers, torch_weight, False),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -363,21 +428,32 @@ def test_multiprocess_sparse_adam_cuda_tensor(num_workers):

    assert F.allclose(dgl_weight, torch_weight)

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@unittest.skipIf(F.ctx().type != 'cpu', reason='cpu only test')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@unittest.skipIf(F.ctx().type != "cpu", reason="cpu only test")
 @pytest.mark.parametrize("num_workers", [2, 4])
 def test_multiprocess_sparse_adam_cpu_zero_step(num_workers):
-    backend = 'gloo'
+    backend = "gloo"

    worker_list = []
-    num_embs=128
-    emb_dim=10
+    num_embs = 128
+    emb_dim = 10
    dgl_weight = th.empty((num_embs, emb_dim))
-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for i in range(num_workers):
        device = F.ctx()
-        p = ctx.Process(target=start_sparse_adam_worker,
-                        args=(i, device, num_workers, dgl_weight, th.device('cpu'), True, backend))
+        p = ctx.Process(
+            target=start_sparse_adam_worker,
+            args=(
+                i,
+                device,
+                num_workers,
+                dgl_weight,
+                th.device("cpu"),
+                True,
+                backend,
+            ),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -386,8 +462,10 @@ def test_multiprocess_sparse_adam_cpu_zero_step(num_workers):
    worker_list = []
    torch_weight = th.empty((num_embs, emb_dim))
    for i in range(num_workers):
-        p = ctx.Process(target=start_torch_adam_worker,
-                        args=(i, num_workers, torch_weight, False))
+        p = ctx.Process(
+            target=start_torch_adam_worker,
+            args=(i, num_workers, torch_weight, False),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -395,26 +473,37 @@ def test_multiprocess_sparse_adam_cpu_zero_step(num_workers):

    assert F.allclose(dgl_weight, torch_weight)

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@unittest.skipIf(F.ctx().type == 'cpu', reason='gpu only test')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@unittest.skipIf(F.ctx().type == "cpu", reason="gpu only test")
 @pytest.mark.parametrize("num_workers", [2, 4, 8])
-@pytest.mark.parametrize("backend", ['nccl', 'gloo'])
+@pytest.mark.parametrize("backend", ["nccl", "gloo"])
 def test_multiprocess_sparse_adam_zero_step(num_workers, backend):
-    if F.ctx().type == 'cuda' and th.cuda.device_count() < num_workers:
+    if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers:
        pytest.skip("Not enough GPUs to run test.")

    worker_list = []
-    num_embs=128
-    emb_dim=10
+    num_embs = 128
+    emb_dim = 10
    dgl_weight = th.empty((num_embs, emb_dim))
-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for i in range(num_workers):
        device = F.ctx()
-        if device.type == 'cuda':
+        if device.type == "cuda":
            # make sure each process has a unique GPU
            device = th.device(i)
-        p = ctx.Process(target=start_sparse_adam_worker,
-                        args=(i, device, num_workers, dgl_weight, th.device('cpu'), True, backend))
+        p = ctx.Process(
+            target=start_sparse_adam_worker,
+            args=(
+                i,
+                device,
+                num_workers,
+                dgl_weight,
+                th.device("cpu"),
+                True,
+                backend,
+            ),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -423,8 +512,10 @@ def test_multiprocess_sparse_adam_zero_step(num_workers, backend):
    worker_list = []
    torch_weight = th.empty((num_embs, emb_dim))
    for i in range(num_workers):
-        p = ctx.Process(target=start_torch_adam_worker,
-                        args=(i, num_workers, torch_weight, False))
+        p = ctx.Process(
+            target=start_torch_adam_worker,
+            args=(i, num_workers, torch_weight, False),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -432,23 +523,28 @@ def test_multiprocess_sparse_adam_zero_step(num_workers, backend):

    assert F.allclose(dgl_weight, torch_weight)

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
-@unittest.skipIf(F.ctx().type == 'cpu', reason='cuda tensor is not supported for cpu')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
+@unittest.skipIf(
+    F.ctx().type == "cpu", reason="cuda tensor is not supported for cpu"
+)
 @pytest.mark.parametrize("num_workers", [2, 4, 8])
 def test_multiprocess_sparse_adam_zero_step_cuda_tensor(num_workers):
-    if F.ctx().type == 'cuda' and th.cuda.device_count() < num_workers:
+    if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers:
        pytest.skip("Not enough GPUs to run test.")

-    backend = 'nccl'
+    backend = "nccl"
    worker_list = []
-    num_embs=128
-    emb_dim=10
+    num_embs = 128
+    emb_dim = 10
    dgl_weight = th.empty((num_embs, emb_dim))
-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for i in range(num_workers):
        device = th.device(i)
-        p = ctx.Process(target=start_sparse_adam_worker,
-                        args=(i, device, num_workers, dgl_weight, device, True, backend))
+        p = ctx.Process(
+            target=start_sparse_adam_worker,
+            args=(i, device, num_workers, dgl_weight, device, True, backend),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -457,8 +553,10 @@ def test_multiprocess_sparse_adam_zero_step_cuda_tensor(num_workers):
    worker_list = []
    torch_weight = th.empty((num_embs, emb_dim))
    for i in range(num_workers):
-        p = ctx.Process(target=start_torch_adam_worker,
-                        args=(i, num_workers, torch_weight, False))
+        p = ctx.Process(
+            target=start_torch_adam_worker,
+            args=(i, num_workers, torch_weight, False),
+        )
        p.start()
        worker_list.append(p)
    for p in worker_list:
@@ -466,7 +564,8 @@ def test_multiprocess_sparse_adam_zero_step_cuda_tensor(num_workers):

    assert F.allclose(dgl_weight, torch_weight)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    test_sparse_adam(1)
    test_sparse_adam(4)
    test_sparse_adam(101)
@@ -478,15 +577,15 @@ if __name__ == '__main__':
    test_multiprocess_cpu_sparse_adam(8)
    test_multiprocess_sparse_adam_cpu_zero_step(2)

-    test_multiprocess_sparse_adam(2, backend='gloo')
-    test_multiprocess_sparse_adam(4, backend='gloo')
-    test_multiprocess_sparse_adam(8, backend='gloo')
-    test_multiprocess_sparse_adam(2, backend='nccl')
-    test_multiprocess_sparse_adam(4, backend='nccl')
-    test_multiprocess_sparse_adam(8, backend='nccl')
+    test_multiprocess_sparse_adam(2, backend="gloo")
+    test_multiprocess_sparse_adam(4, backend="gloo")
+    test_multiprocess_sparse_adam(8, backend="gloo")
+    test_multiprocess_sparse_adam(2, backend="nccl")
+    test_multiprocess_sparse_adam(4, backend="nccl")
+    test_multiprocess_sparse_adam(8, backend="nccl")

-    test_multiprocess_sparse_adam_zero_step(2, backend='gloo')
-    test_multiprocess_sparse_adam_zero_step(4, backend='nccl')
+    test_multiprocess_sparse_adam_zero_step(2, backend="gloo")
+    test_multiprocess_sparse_adam_zero_step(4, backend="nccl")

    test_multiprocess_sparse_adam_cuda_tensor(2)
    test_multiprocess_sparse_adam_zero_step_cuda_tensor(4)
--- a/tests/pytorch/test_pickle.py
+++ b/tests/pytorch/test_pickle.py
+import io
+import pickle
+
 import networkx as nx
-import dgl
 import torch
-import pickle
-import io
+
+import dgl
+

 def _reconstruct_pickle(obj):
    f = io.BytesIO()
@@ -12,15 +15,17 @@ def _reconstruct_pickle(obj):
    f.close()
    return obj

+
 def test_pickling_batched_graph():
    # NOTE: this is a test for a wierd bug mentioned in
    #   https://github.com/dmlc/dgl/issues/438
    glist = [nx.path_graph(i + 5) for i in range(5)]
    glist = [dgl.DGLGraph(g) for g in glist]
    bg = dgl.batch(glist)
-    bg.ndata['x'] = torch.randn((35, 5))
-    bg.edata['y'] = torch.randn((60, 3))
+    bg.ndata["x"] = torch.randn((35, 5))
+    bg.edata["y"] = torch.randn((60, 3))
    new_bg = _reconstruct_pickle(bg)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    test_pickling_batched_graph()
--- a/tests/pytorch/test_pin_memory.py
+++ b/tests/pytorch/test_pin_memory.py
 import backend as F
-import dgl
 import pytest
 import torch

-@pytest.mark.skipif(F._default_context_str == 'cpu', reason="Need gpu for this test.")
+import dgl
+
+
+@pytest.mark.skipif(
+    F._default_context_str == "cpu", reason="Need gpu for this test."
+)
 def test_pin_noncontiguous():
    t = torch.empty([10, 100]).transpose(0, 1)

@@ -13,7 +17,10 @@ def test_pin_noncontiguous():
    with pytest.raises(dgl.DGLError):
        dgl.utils.pin_memory_inplace(t)

-@pytest.mark.skipif(F._default_context_str == 'cpu', reason="Need gpu for this test.")
+
+@pytest.mark.skipif(
+    F._default_context_str == "cpu", reason="Need gpu for this test."
+)
 def test_pin_view():
    t = torch.empty([100, 10])
    v = t[10:20]
@@ -24,7 +31,10 @@ def test_pin_view():
    with pytest.raises(dgl.DGLError):
        dgl.utils.pin_memory_inplace(v)

-@pytest.mark.skipif(F._default_context_str == 'cpu', reason='Need gpu for this test.')
+
+@pytest.mark.skipif(
+    F._default_context_str == "cpu", reason="Need gpu for this test."
+)
 def test_unpin_automatically():
    # run a sufficient number of iterations such that the memory pool should be
    # re-used
@@ -38,26 +48,32 @@ def test_unpin_automatically():
        assert not F.is_pinned(t)
        del t

-@pytest.mark.skipif(F._default_context_str == 'cpu', reason='Need gpu for this test.')
+
+@pytest.mark.skipif(
+    F._default_context_str == "cpu", reason="Need gpu for this test."
+)
 def test_pin_unpin_column():
    g = dgl.graph(([1, 2, 3, 4], [0, 0, 0, 0]))

-    g.ndata['x'] = torch.randn(g.num_nodes())
+    g.ndata["x"] = torch.randn(g.num_nodes())
    g.pin_memory_()
    assert g.is_pinned()
-    assert g.ndata['x'].is_pinned()
+    assert g.ndata["x"].is_pinned()
    for col in g._node_frames[0].values():
        assert col.pinned_by_dgl
        assert col._data_nd is not None

-    g.ndata['x'] = torch.randn(g.num_nodes())  # unpin the old ndata['x']
+    g.ndata["x"] = torch.randn(g.num_nodes())  # unpin the old ndata['x']
    assert g.is_pinned()
    for col in g._node_frames[0].values():
        assert not col.pinned_by_dgl
        assert col._data_nd is None
-    assert not g.ndata['x'].is_pinned()
+    assert not g.ndata["x"].is_pinned()

-@pytest.mark.skipif(F._default_context_str == 'cpu', reason='Need gpu for this test.')
+
+@pytest.mark.skipif(
+    F._default_context_str == "cpu", reason="Need gpu for this test."
+)
 def test_pin_empty():
    t = torch.tensor([])
    assert not t.is_pinned()
@@ -68,6 +84,7 @@ def test_pin_empty():
    nd = dgl.utils.pin_memory_inplace(t)
    assert not t.is_pinned()

+
 if __name__ == "__main__":
    test_pin_noncontiguous()
    test_pin_view()

--- a/tests/pytorch/test_sparse_emb.py
+++ b/tests/pytorch/test_sparse_emb.py
 import multiprocessing as mp
-import unittest, os
-import pytest
+import os
+import unittest

-import torch as th
 import backend as F
+import pytest
+import torch as th

 from dgl.nn import NodeEmbedding

@@ -13,46 +14,54 @@ def initializer(emb):
    emb.uniform_(-1.0, 1.0)
    return emb

+
 def check_all_set_all_get_func(device, init_emb):
    num_embs = init_emb.shape[0]
    emb_dim = init_emb.shape[1]
-    dgl_emb = NodeEmbedding(num_embs, emb_dim, 'test', device=device)
+    dgl_emb = NodeEmbedding(num_embs, emb_dim, "test", device=device)
    dgl_emb.all_set_embedding(init_emb)

    out_emb = dgl_emb.all_get_embedding()
    assert F.allclose(init_emb, out_emb)

+
 def start_sparse_worker(rank, world_size, test, args):
-    print('start sparse worker {}'.format(rank))
-    dist_init_method = 'tcp://{master_ip}:{master_port}'.format(
-        master_ip='127.0.0.1', master_port='12345')
-    backend = 'gloo'
+    print("start sparse worker {}".format(rank))
+    dist_init_method = "tcp://{master_ip}:{master_port}".format(
+        master_ip="127.0.0.1", master_port="12345"
+    )
+    backend = "gloo"
    device = F.ctx()
-    if device.type == 'cuda':
+    if device.type == "cuda":
        device = th.device(rank)
        th.cuda.set_device(device)
-    th.distributed.init_process_group(backend=backend,
-                                      init_method=dist_init_method,
-                                      world_size=world_size,
-                                      rank=rank)
+    th.distributed.init_process_group(
+        backend=backend,
+        init_method=dist_init_method,
+        world_size=world_size,
+        rank=rank,
+    )

    test(device, *args)
    th.distributed.barrier()

-@unittest.skipIf(os.name == 'nt', reason='Do not support windows yet')
+
+@unittest.skipIf(os.name == "nt", reason="Do not support windows yet")
 @pytest.mark.parametrize("num_workers", [1, 2, 3])
 def test_multiprocess_sparse_emb_get_set(num_workers):
-    if F.ctx().type == 'cuda' and th.cuda.device_count() < num_workers:
+    if F.ctx().type == "cuda" and th.cuda.device_count() < num_workers:
        pytest.skip("Not enough GPUs to run test.")

    worker_list = []

    init_emb = th.rand([1000, 8])

-    ctx = mp.get_context('spawn')
+    ctx = mp.get_context("spawn")
    for i in range(num_workers):
-        p = ctx.Process(target=start_sparse_worker,
-                        args=(i, num_workers, check_all_set_all_get_func, (init_emb,)))
+        p = ctx.Process(
+            target=start_sparse_worker,
+            args=(i, num_workers, check_all_set_all_get_func, (init_emb,)),
+        )
        p.start()
        worker_list.append(p)

@@ -62,7 +71,7 @@ def test_multiprocess_sparse_emb_get_set(num_workers):
        assert p.exitcode == 0


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_sparse_emb_get_set(1)
    test_sparse_emb_get_set(2)
    test_sparse_emb_get_set(3)
--- a/tests/pytorch/test_stream.py
+++ b/tests/pytorch/test_stream.py
-from statistics import mean
 import unittest
+from statistics import mean
+
+import backend as F
 import numpy as np
 import torch
+
 import dgl
 import dgl.ndarray as nd
-from dgl import rand_graph
 import dgl.ops as OPS
-from dgl._ffi.streams import to_dgl_stream_handle, _dgl_get_stream
+from dgl import rand_graph
+from dgl._ffi.streams import _dgl_get_stream, to_dgl_stream_handle
 from dgl.utils import to_dgl_context
-import backend as F
+

 # borrowed from PyTorch, torch/testing/_internal/common_utils.py
 def _get_cycles_per_ms() -> float:
-    """Measure and return approximate number of cycles per millisecond for torch.cuda._sleep
-    """
+    """Measure and return approximate number of cycles per millisecond for torch.cuda._sleep"""

    def measure() -> float:
        start = torch.cuda.Event(enable_timing=True)
@@ -36,7 +38,10 @@ def _get_cycles_per_ms() -> float:
    vals = sorted(vals)
    return mean(vals[2 : num - 2])

-@unittest.skipIf(F._default_context_str == 'cpu', reason="stream only runs on GPU.")
+
+@unittest.skipIf(
+    F._default_context_str == "cpu", reason="stream only runs on GPU."
+)
 def test_basics():
    g = rand_graph(10, 20, device=F.cpu())
    x = torch.ones(g.num_nodes(), 10)
@@ -57,22 +62,31 @@ def test_basics():
    s.synchronize()
    assert torch.equal(OPS.copy_u_sum(gg, xx), result)

-@unittest.skipIf(F._default_context_str == 'cpu', reason="stream only runs on GPU.")
+
+@unittest.skipIf(
+    F._default_context_str == "cpu", reason="stream only runs on GPU."
+)
 def test_set_get_stream():
    current_stream = torch.cuda.current_stream()
    # test setting another stream
    s = torch.cuda.Stream(device=F.ctx())
    torch.cuda.set_stream(s)
-    assert to_dgl_stream_handle(s).value == _dgl_get_stream(to_dgl_context(F.ctx())).value
+    assert (
+        to_dgl_stream_handle(s).value
+        == _dgl_get_stream(to_dgl_context(F.ctx())).value
+    )
    # revert to default stream
    torch.cuda.set_stream(current_stream)

-@unittest.skipIf(F._default_context_str == 'cpu', reason="stream only runs on GPU.")
+
+@unittest.skipIf(
+    F._default_context_str == "cpu", reason="stream only runs on GPU."
+)
 # borrowed from PyTorch, test/test_cuda.py: test_record_stream()
 def test_record_stream_ndarray():
    cycles_per_ms = _get_cycles_per_ms()

-    t = nd.array(np.array([1., 2., 3., 4.], dtype=np.float32), ctx=nd.cpu())
+    t = nd.array(np.array([1.0, 2.0, 3.0, 4.0], dtype=np.float32), ctx=nd.cpu())
    t.pin_memory_()
    result = nd.empty([4], ctx=nd.gpu(0))
    stream = torch.cuda.Stream()
@@ -84,25 +98,33 @@ def test_record_stream_ndarray():
            tmp = t.copyto(nd.gpu(0))
            ptr[0] = F.from_dgl_nd(tmp).data_ptr()
        torch.cuda.current_stream().wait_stream(stream)
-        tmp.record_stream(
-            to_dgl_stream_handle(torch.cuda.current_stream()))
+        tmp.record_stream(to_dgl_stream_handle(torch.cuda.current_stream()))
        torch.cuda._sleep(int(50 * cycles_per_ms))  # delay the copy
        result.copyfrom(tmp)

    perform_copy()
    with torch.cuda.stream(stream):
        tmp2 = nd.empty([4], ctx=nd.gpu(0))
-        assert F.from_dgl_nd(tmp2).data_ptr() != ptr[0], 'allocation re-used too soon'
+        assert (
+            F.from_dgl_nd(tmp2).data_ptr() != ptr[0]
+        ), "allocation re-used too soon"

-    assert torch.equal(F.from_dgl_nd(result).cpu(), torch.tensor([1., 2., 3., 4.]))
+    assert torch.equal(
+        F.from_dgl_nd(result).cpu(), torch.tensor([1.0, 2.0, 3.0, 4.0])
+    )

    # Check that the block will be re-used after the main stream finishes
    torch.cuda.current_stream().synchronize()
    with torch.cuda.stream(stream):
        tmp3 = nd.empty([4], ctx=nd.gpu(0))
-        assert F.from_dgl_nd(tmp3).data_ptr() == ptr[0], 'allocation not re-used'
+        assert (
+            F.from_dgl_nd(tmp3).data_ptr() == ptr[0]
+        ), "allocation not re-used"

-@unittest.skipIf(F._default_context_str == 'cpu', reason="stream only runs on GPU.")
+
+@unittest.skipIf(
+    F._default_context_str == "cpu", reason="stream only runs on GPU."
+)
 def test_record_stream_graph_positive():
    cycles_per_ms = _get_cycles_per_ms()

@@ -133,7 +155,10 @@ def test_record_stream_graph_positive():
    torch.cuda.current_stream().synchronize()
    assert torch.equal(result, results2)

-@unittest.skipIf(F._default_context_str == 'cpu', reason="stream only runs on GPU.")
+
+@unittest.skipIf(
+    F._default_context_str == "cpu", reason="stream only runs on GPU."
+)
 def test_record_stream_graph_negative():
    cycles_per_ms = _get_cycles_per_ms()

@@ -165,7 +190,8 @@ def test_record_stream_graph_negative():
    torch.cuda.current_stream().synchronize()
    assert not torch.equal(result, results2)

-if __name__ == '__main__':
+
+if __name__ == "__main__":
    test_basics()
    test_set_get_stream()
    test_record_stream_ndarray()