[Misc] Black auto fix. (#4697)

Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Black auto fix. (#4697)
Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
ea48ce7a · Hongzhi (Steve), Chen · GitHub · bd3fe59e · ea48ce7a · ea48ce7a
Unverified Commit ea48ce7a authored Oct 11, 2022 by Hongzhi (Steve), Chen Committed by GitHub Oct 11, 2022
16 changed files
--- a/tests/scripts/ci_report/report.py
+++ b/tests/scripts/ci_report/report.py
-from urllib.parse import urlparse, urljoin
-import os
-import requests
-import pytest
-import json
 import enum
-from pathlib import Path
+import json
+import os
 import tempfile
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+
+import pytest
+import requests


 class JobStatus(enum.Enum):
@@ -27,8 +28,8 @@ JENKINS_STATUS_MAPPING = {

 assert "BUILD_URL" in os.environ, "Are you in the Jenkins environment?"
 job_link = os.environ["BUILD_URL"]
-response = requests.get('{}wfapi'.format(job_link), verify=False).json()
-domain = '{uri.scheme}://{uri.netloc}/'.format(uri=urlparse(job_link))
+response = requests.get("{}wfapi".format(job_link), verify=False).json()
+domain = "{uri.scheme}://{uri.netloc}/".format(uri=urlparse(job_link))
 stages = response["stages"]

 final_dict = {}
@@ -41,37 +42,38 @@ def get_jenkins_json(path):


 for stage in stages:
-    link = stage['_links']['self']['href']
-    stage_name = stage['name']
+    link = stage["_links"]["self"]["href"]
+    stage_name = stage["name"]
    res = requests.get(urljoin(domain, link), verify=False).json()
-    nodes = res['stageFlowNodes']
+    nodes = res["stageFlowNodes"]
    for node in nodes:
-        nodes_dict[node['id']] = node
-        nodes_dict[node['id']]['stageName'] = stage_name
+        nodes_dict[node["id"]] = node
+        nodes_dict[node["id"]]["stageName"] = stage_name
+

 def get_node_full_name(node, node_dict):
    name = ""
    while "parentNodes" in node:
        name = name + "/" + node["name"]
-        id = node['parentNodes'][0]
+        id = node["parentNodes"][0]
        if id in nodes_dict:
            node = node_dict[id]
        else:
            break
    return name

+
 for key, node in nodes_dict.items():
-    logs = get_jenkins_json(
-        node['_links']['log']['href']).get('text', '')
-    node_name = node['name']
-    if "Post Actions" in node['stageName']:
+    logs = get_jenkins_json(node["_links"]["log"]["href"]).get("text", "")
+    node_name = node["name"]
+    if "Post Actions" in node["stageName"]:
        continue
-    node_status = node['status']
-    id = node['id']
+    node_status = node["status"]
+    id = node["id"]
    full_name = get_node_full_name(node, nodes_dict)
-    final_dict["{}_{}/{}".format(id, node['stageName'], full_name)] = {
+    final_dict["{}_{}/{}".format(id, node["stageName"], full_name)] = {
        "status": JENKINS_STATUS_MAPPING[node_status],
-        "logs": logs
+        "logs": logs,
    }

 JOB_NAME = os.getenv("JOB_NAME")
@@ -85,15 +87,18 @@ prefix = f"https://dgl-ci-result.s3.us-west-2.amazonaws.com/{JOB_NAME}/{BUILD_NU
 def test_generate_report(test_name):
    os.makedirs("./logs_dir/", exist_ok=True)
    tmp = tempfile.NamedTemporaryFile(
-        mode='w', delete=False, suffix=".log", dir="./logs_dir/")
+        mode="w", delete=False, suffix=".log", dir="./logs_dir/"
+    )
    tmp.write(final_dict[test_name]["logs"])
    filename = Path(tmp.name).name
    # print(final_dict[test_name]["logs"])
-    print("Log path: {}".format(prefix+filename))
+    print("Log path: {}".format(prefix + filename))

    if final_dict[test_name]["status"] == JobStatus.FAIL:
        pytest.fail(
-            "Test failed. Please see the log at {}".format(prefix+filename))
+            "Test failed. Please see the log at {}".format(prefix + filename)
+        )
    elif final_dict[test_name]["status"] == JobStatus.SKIP:
        pytest.skip(
-            "Test skipped. Please see the log at {}".format(prefix+filename))
+            "Test skipped. Please see the log at {}".format(prefix + filename)
+        )
--- a/tests/scripts/ci_report/status.py
+++ b/tests/scripts/ci_report/status.py
 import os
+
 import requests
+
 JOB_NAME = os.getenv("JOB_NAME")
 BUILD_NUMBER = os.getenv("BUILD_NUMBER")
 BUILD_ID = os.getenv("BUILD_ID")
 COMMIT = os.getenv("GIT_COMMIT")

 job_link = os.environ["BUILD_URL"]
-response = requests.get('{}wfapi'.format(job_link), verify=False).json()
+response = requests.get("{}wfapi".format(job_link), verify=False).json()
 status = "✅ CI test succeeded"
-for v in response['stages']:
-    if v['status'] in ['FAILED', 'ABORTED']:
-        status = "❌ CI test failed in Stage [{}].".format(v['name'])
+for v in response["stages"]:
+    if v["status"] in ["FAILED", "ABORTED"]:
+        status = "❌ CI test failed in Stage [{}].".format(v["name"])
        break

 comment = f""" Commit ID: {COMMIT}\n

--- a/tests/tensorflow/test_basic.py
+++ b/tests/tensorflow/test_basic.py
 def test():
    pass

+
 if __name__ == "__main__":
-    test()
\ No newline at end of file
+    test()
--- a/tests/tensorflow/test_nn.py
+++ b/tests/tensorflow/test_nn.py
-import tensorflow as tf
-from tensorflow.keras import layers
+from copy import deepcopy
+
+import backend as F
 import networkx as nx
+import numpy as np
 import pytest
+import scipy as sp
+import tensorflow as tf
+from tensorflow.keras import layers
+from test_utils import parametrize_idtype
+from test_utils.graph_cases import (
+    get_cases,
+    random_bipartite,
+    random_dglgraph,
+    random_graph,
+)
+
 import dgl
-import dgl.nn.tensorflow as nn
 import dgl.function as fn
-import backend as F
-from test_utils.graph_cases import get_cases, random_graph, random_bipartite, random_dglgraph
-from test_utils import parametrize_idtype
-from copy import deepcopy
+import dgl.nn.tensorflow as nn

-import numpy as np
-import scipy as sp

 def _AXWb(A, X, W, b):
    X = tf.matmul(X, W)
    Y = tf.reshape(tf.matmul(A, tf.reshape(X, (X.shape[0], -1))), X.shape)
    return Y + b

-@pytest.mark.parametrize('out_dim', [1, 2])
+
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_graph_conv(out_dim):
    g = dgl.DGLGraph(nx.path_graph(3)).to(F.ctx())
    ctx = F.ctx()
-    adj = tf.sparse.to_dense(tf.sparse.reorder(g.adjacency_matrix(transpose=True, ctx=ctx)))
+    adj = tf.sparse.to_dense(
+        tf.sparse.reorder(g.adjacency_matrix(transpose=True, ctx=ctx))
+    )

-    conv = nn.GraphConv(5, out_dim, norm='none', bias=True)
+    conv = nn.GraphConv(5, out_dim, norm="none", bias=True)
    # conv = conv
    print(conv)
    # test#1: basic
@@ -72,12 +82,16 @@ def test_graph_conv(out_dim):
    # new_weight = conv.weight.data
    # assert not F.allclose(old_weight, new_weight)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree', 'dglgraph']))
-@pytest.mark.parametrize('norm', ['none', 'both', 'right', 'left'])
-@pytest.mark.parametrize('weight', [True, False])
-@pytest.mark.parametrize('bias', [True, False])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize(
+    "g",
+    get_cases(["homo", "block-bipartite"], exclude=["zero-degree", "dglgraph"]),
+)
+@pytest.mark.parametrize("norm", ["none", "both", "right", "left"])
+@pytest.mark.parametrize("weight", [True, False])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_graph_conv2(idtype, g, norm, weight, bias, out_dim):
    g = g.astype(idtype).to(F.ctx())
    conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias)
@@ -92,12 +106,15 @@ def test_graph_conv2(idtype, g, norm, weight, bias, out_dim):
        h_out = conv(g, h, weight=ext_w)
    assert h_out.shape == (ndst, out_dim)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree', 'dglgraph']))
-@pytest.mark.parametrize('norm', ['none', 'both', 'right'])
-@pytest.mark.parametrize('weight', [True, False])
-@pytest.mark.parametrize('bias', [True, False])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize(
+    "g", get_cases(["bipartite"], exclude=["zero-degree", "dglgraph"])
+)
+@pytest.mark.parametrize("norm", ["none", "both", "right"])
+@pytest.mark.parametrize("weight", [True, False])
+@pytest.mark.parametrize("bias", [True, False])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_graph_conv2_bi(idtype, g, norm, weight, bias, out_dim):
    g = g.astype(idtype).to(F.ctx())
    conv = nn.GraphConv(5, out_dim, norm=norm, weight=weight, bias=bias)
@@ -112,6 +129,7 @@ def test_graph_conv2_bi(idtype, g, norm, weight, bias, out_dim):
        h_out = conv(g, (h, h_dst), weight=ext_w)
    assert h_out.shape == (ndst, out_dim)

+
 def test_simple_pool():
    ctx = F.ctx()
    g = dgl.DGLGraph(nx.path_graph(15)).to(F.ctx())
@@ -119,7 +137,7 @@ def test_simple_pool():
    sum_pool = nn.SumPooling()
    avg_pool = nn.AvgPooling()
    max_pool = nn.MaxPooling()
-    sort_pool = nn.SortPooling(10) # k = 10
+    sort_pool = nn.SortPooling(10)  # k = 10
    print(sum_pool, avg_pool, max_pool, sort_pool)

    # test#1: basic
@@ -138,32 +156,48 @@ def test_simple_pool():
    bg = dgl.batch([g, g_, g, g_, g])
    h0 = F.randn((bg.number_of_nodes(), 5))
    h1 = sum_pool(bg, h0)
-    truth = tf.stack([F.sum(h0[:15], 0),
-                      F.sum(h0[15:20], 0),
-                      F.sum(h0[20:35], 0),
-                      F.sum(h0[35:40], 0),
-                      F.sum(h0[40:55], 0)], 0)
+    truth = tf.stack(
+        [
+            F.sum(h0[:15], 0),
+            F.sum(h0[15:20], 0),
+            F.sum(h0[20:35], 0),
+            F.sum(h0[35:40], 0),
+            F.sum(h0[40:55], 0),
+        ],
+        0,
+    )
    assert F.allclose(h1, truth)

    h1 = avg_pool(bg, h0)
-    truth = tf.stack([F.mean(h0[:15], 0),
-                      F.mean(h0[15:20], 0),
-                      F.mean(h0[20:35], 0),
-                      F.mean(h0[35:40], 0),
-                      F.mean(h0[40:55], 0)], 0)
+    truth = tf.stack(
+        [
+            F.mean(h0[:15], 0),
+            F.mean(h0[15:20], 0),
+            F.mean(h0[20:35], 0),
+            F.mean(h0[35:40], 0),
+            F.mean(h0[40:55], 0),
+        ],
+        0,
+    )
    assert F.allclose(h1, truth)

    h1 = max_pool(bg, h0)
-    truth = tf.stack([F.max(h0[:15], 0),
-                      F.max(h0[15:20], 0),
-                      F.max(h0[20:35], 0),
-                      F.max(h0[35:40], 0),
-                      F.max(h0[40:55], 0)], 0)
+    truth = tf.stack(
+        [
+            F.max(h0[:15], 0),
+            F.max(h0[15:20], 0),
+            F.max(h0[20:35], 0),
+            F.max(h0[35:40], 0),
+            F.max(h0[40:55], 0),
+        ],
+        0,
+    )
    assert F.allclose(h1, truth)

    h1 = sort_pool(bg, h0)
    assert h1.shape[0] == 5 and h1.shape[1] == 10 * 5 and h1.ndim == 2

+
 def test_glob_att_pool():
    g = dgl.DGLGraph(nx.path_graph(10)).to(F.ctx())

@@ -182,10 +216,12 @@ def test_glob_att_pool():
    assert h1.shape[0] == 4 and h1.shape[1] == 10 and h1.ndim == 2


-@pytest.mark.parametrize('O', [1, 2, 8])
+@pytest.mark.parametrize("O", [1, 2, 8])
 def test_rgcn(O):
    etype = []
-    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True).to(F.ctx())
+    g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1), readonly=True).to(
+        F.ctx()
+    )
    # 5 etypes
    R = 5
    for i in range(g.number_of_edges()):
@@ -262,10 +298,13 @@ def test_rgcn(O):
    assert list(h_new_low.shape) == [100, O]
    assert F.allclose(h_new, h_new_low)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
-@pytest.mark.parametrize('num_heads', [1, 4])
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"])
+)
+@pytest.mark.parametrize("out_dim", [1, 2])
+@pytest.mark.parametrize("num_heads", [1, 4])
 def test_gat_conv(g, idtype, out_dim, num_heads):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -280,24 +319,29 @@ def test_gat_conv(g, idtype, out_dim, num_heads):
    gat = nn.GATConv(5, out_dim, num_heads, residual=True)
    h = gat(g, feat)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
-@pytest.mark.parametrize('num_heads', [1, 4])
+@pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"]))
+@pytest.mark.parametrize("out_dim", [1, 2])
+@pytest.mark.parametrize("num_heads", [1, 4])
 def test_gat_conv_bi(g, idtype, out_dim, num_heads):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
    gat = nn.GATConv(5, out_dim, num_heads)
-    feat = (F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)))
+    feat = (
+        F.randn((g.number_of_src_nodes(), 5)),
+        F.randn((g.number_of_dst_nodes(), 5)),
+    )
    h = gat(g, feat)
    assert h.shape == (g.number_of_dst_nodes(), num_heads, out_dim)
    _, a = gat(g, feat, get_attention=True)
    assert a.shape == (g.number_of_edges(), num_heads, 1)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite']))
-@pytest.mark.parametrize('aggre_type', ['mean', 'pool', 'gcn'])
-@pytest.mark.parametrize('out_dim', [1, 10])
+@pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"]))
+@pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"])
+@pytest.mark.parametrize("out_dim", [1, 10])
 def test_sage_conv(idtype, g, aggre_type, out_dim):
    g = g.astype(idtype).to(F.ctx())
    sage = nn.SAGEConv(5, out_dim, aggre_type)
@@ -305,41 +349,49 @@ def test_sage_conv(idtype, g, aggre_type, out_dim):
    h = sage(g, feat)
    assert h.shape[-1] == out_dim

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite']))
-@pytest.mark.parametrize('aggre_type', ['mean', 'pool', 'gcn'])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("g", get_cases(["bipartite"]))
+@pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_sage_conv_bi(idtype, g, aggre_type, out_dim):
    g = g.astype(idtype).to(F.ctx())
-    dst_dim = 5 if aggre_type != 'gcn' else 10
+    dst_dim = 5 if aggre_type != "gcn" else 10
    sage = nn.SAGEConv((10, dst_dim), out_dim, aggre_type)
-    feat = (F.randn((g.number_of_src_nodes(), 10)), F.randn((g.number_of_dst_nodes(), dst_dim)))
+    feat = (
+        F.randn((g.number_of_src_nodes(), 10)),
+        F.randn((g.number_of_dst_nodes(), dst_dim)),
+    )
    h = sage(g, feat)
    assert h.shape[-1] == out_dim
    assert h.shape[0] == g.number_of_dst_nodes()

+
 @parametrize_idtype
-@pytest.mark.parametrize('aggre_type', ['mean', 'pool', 'gcn'])
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("aggre_type", ["mean", "pool", "gcn"])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_sage_conv_bi_empty(idtype, aggre_type, out_dim):
    # Test the case for graphs without edges
-    g = dgl.heterograph({('_U', '_E', '_V'): ([], [])}, {'_U': 5, '_V': 3}).to(F.ctx())
+    g = dgl.heterograph({("_U", "_E", "_V"): ([], [])}, {"_U": 5, "_V": 3}).to(
+        F.ctx()
+    )
    g = g.astype(idtype).to(F.ctx())
-    sage = nn.SAGEConv((3, 3), out_dim, 'gcn')
+    sage = nn.SAGEConv((3, 3), out_dim, "gcn")
    feat = (F.randn((5, 3)), F.randn((3, 3)))
    h = sage(g, feat)
    assert h.shape[-1] == out_dim
    assert h.shape[0] == 3
-    for aggre_type in ['mean', 'pool', 'lstm']:
+    for aggre_type in ["mean", "pool", "lstm"]:
        sage = nn.SAGEConv((3, 1), out_dim, aggre_type)
        feat = (F.randn((5, 3)), F.randn((3, 1)))
        h = sage(g, feat)
        assert h.shape[-1] == out_dim
        assert h.shape[0] == 3

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"]))
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_sgc_conv(g, idtype, out_dim):
    ctx = F.ctx()
    g = g.astype(idtype).to(ctx)
@@ -357,8 +409,9 @@ def test_sgc_conv(g, idtype, out_dim):
    assert F.allclose(h_0, h_1)
    assert h_0.shape[-1] == out_dim

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo'], exclude=['zero-degree']))
+@pytest.mark.parametrize("g", get_cases(["homo"], exclude=["zero-degree"]))
 def test_appnp_conv(g, idtype):
    ctx = F.ctx()
    g = g.astype(idtype).to(ctx)
@@ -368,36 +421,38 @@ def test_appnp_conv(g, idtype):
    h = appnp(g, feat)
    assert h.shape[-1] == 5

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite']))
-@pytest.mark.parametrize('aggregator_type', ['mean', 'max', 'sum'])
+@pytest.mark.parametrize("g", get_cases(["homo", "block-bipartite"]))
+@pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"])
 def test_gin_conv(g, idtype, aggregator_type):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
-    gin = nn.GINConv(
-        tf.keras.layers.Dense(12),
-        aggregator_type
-    )
+    gin = nn.GINConv(tf.keras.layers.Dense(12), aggregator_type)
    feat = F.randn((g.number_of_src_nodes(), 5))
    h = gin(g, feat)
    assert h.shape == (g.number_of_dst_nodes(), 12)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite']))
-@pytest.mark.parametrize('aggregator_type', ['mean', 'max', 'sum'])
+@pytest.mark.parametrize("g", get_cases(["bipartite"]))
+@pytest.mark.parametrize("aggregator_type", ["mean", "max", "sum"])
 def test_gin_conv_bi(g, idtype, aggregator_type):
    g = g.astype(idtype).to(F.ctx())
-    gin = nn.GINConv(
-        tf.keras.layers.Dense(12),
-        aggregator_type
+    gin = nn.GINConv(tf.keras.layers.Dense(12), aggregator_type)
+    feat = (
+        F.randn((g.number_of_src_nodes(), 5)),
+        F.randn((g.number_of_dst_nodes(), 5)),
    )
-    feat = (F.randn((g.number_of_src_nodes(), 5)), F.randn((g.number_of_dst_nodes(), 5)))
    h = gin(g, feat)
    assert h.shape == (g.number_of_dst_nodes(), 12)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['homo', 'block-bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize(
+    "g", get_cases(["homo", "block-bipartite"], exclude=["zero-degree"])
+)
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_edge_conv(g, idtype, out_dim):
    g = g.astype(idtype).to(F.ctx())
    edge_conv = nn.EdgeConv(out_dim)
@@ -406,9 +461,10 @@ def test_edge_conv(g, idtype, out_dim):
    h1 = edge_conv(g, h0)
    assert h1.shape == (g.number_of_dst_nodes(), out_dim)

+
 @parametrize_idtype
-@pytest.mark.parametrize('g', get_cases(['bipartite'], exclude=['zero-degree']))
-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("g", get_cases(["bipartite"], exclude=["zero-degree"]))
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_edge_conv_bi(g, idtype, out_dim):
    g = g.astype(idtype).to(F.ctx())
    ctx = F.ctx()
@@ -419,56 +475,73 @@ def test_edge_conv_bi(g, idtype, out_dim):
    h1 = edge_conv(g, (h0, x0))
    assert h1.shape == (g.number_of_dst_nodes(), out_dim)

+
 def myagg(alist, dsttype):
    rst = alist[0]
    for i in range(1, len(alist)):
        rst = rst + (i + 1) * alist[i]
    return rst

+
 @parametrize_idtype
-@pytest.mark.parametrize('agg', ['sum', 'max', 'min', 'mean', 'stack', myagg])
+@pytest.mark.parametrize("agg", ["sum", "max", "min", "mean", "stack", myagg])
 def test_hetero_conv(agg, idtype):
-    g = dgl.heterograph({
-        ('user', 'follows', 'user'): ([0, 0, 2, 1], [1, 2, 1, 3]),
-        ('user', 'plays', 'game'): ([0, 0, 0, 1, 2], [0, 2, 3, 0, 2]),
-        ('store', 'sells', 'game'): ([0, 0, 1, 1], [0, 3, 1, 2])},
-        idtype=idtype, device=F.ctx())
-    conv = nn.HeteroGraphConv({
-        'follows': nn.GraphConv(2, 3, allow_zero_in_degree=True),
-        'plays': nn.GraphConv(2, 4, allow_zero_in_degree=True),
-        'sells': nn.GraphConv(3, 4, allow_zero_in_degree=True)},
-        agg)
+    g = dgl.heterograph(
+        {
+            ("user", "follows", "user"): ([0, 0, 2, 1], [1, 2, 1, 3]),
+            ("user", "plays", "game"): ([0, 0, 0, 1, 2], [0, 2, 3, 0, 2]),
+            ("store", "sells", "game"): ([0, 0, 1, 1], [0, 3, 1, 2]),
+        },
+        idtype=idtype,
+        device=F.ctx(),
+    )
+    conv = nn.HeteroGraphConv(
+        {
+            "follows": nn.GraphConv(2, 3, allow_zero_in_degree=True),
+            "plays": nn.GraphConv(2, 4, allow_zero_in_degree=True),
+            "sells": nn.GraphConv(3, 4, allow_zero_in_degree=True),
+        },
+        agg,
+    )
    uf = F.randn((4, 2))
    gf = F.randn((4, 4))
    sf = F.randn((2, 3))

-    h = conv(g, {'user': uf, 'store': sf, 'game': gf})
-    assert set(h.keys()) == {'user', 'game'}
-    if agg != 'stack':
-        assert h['user'].shape == (4, 3)
-        assert h['game'].shape == (4, 4)
+    h = conv(g, {"user": uf, "store": sf, "game": gf})
+    assert set(h.keys()) == {"user", "game"}
+    if agg != "stack":
+        assert h["user"].shape == (4, 3)
+        assert h["game"].shape == (4, 4)
    else:
-        assert h['user'].shape == (4, 1, 3)
-        assert h['game'].shape == (4, 2, 4)
-
-    block = dgl.to_block(g.to(F.cpu()), {'user': [0, 1, 2, 3], 'game': [0, 1, 2, 3], 'store': []}).to(F.ctx())
-    h = conv(block, ({'user': uf, 'game': gf, 'store': sf}, {'user': uf, 'game': gf, 'store': sf[0:0]}))
-    assert set(h.keys()) == {'user', 'game'}
-    if agg != 'stack':
-        assert h['user'].shape == (4, 3)
-        assert h['game'].shape == (4, 4)
+        assert h["user"].shape == (4, 1, 3)
+        assert h["game"].shape == (4, 2, 4)
+
+    block = dgl.to_block(
+        g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []}
+    ).to(F.ctx())
+    h = conv(
+        block,
+        (
+            {"user": uf, "game": gf, "store": sf},
+            {"user": uf, "game": gf, "store": sf[0:0]},
+        ),
+    )
+    assert set(h.keys()) == {"user", "game"}
+    if agg != "stack":
+        assert h["user"].shape == (4, 3)
+        assert h["game"].shape == (4, 4)
    else:
-        assert h['user'].shape == (4, 1, 3)
-        assert h['game'].shape == (4, 2, 4)
-
-    h = conv(block, {'user': uf, 'game': gf, 'store': sf})
-    assert set(h.keys()) == {'user', 'game'}
-    if agg != 'stack':
-        assert h['user'].shape == (4, 3)
-        assert h['game'].shape == (4, 4)
+        assert h["user"].shape == (4, 1, 3)
+        assert h["game"].shape == (4, 2, 4)
+
+    h = conv(block, {"user": uf, "game": gf, "store": sf})
+    assert set(h.keys()) == {"user", "game"}
+    if agg != "stack":
+        assert h["user"].shape == (4, 3)
+        assert h["game"].shape == (4, 4)
    else:
-        assert h['user'].shape == (4, 1, 3)
-        assert h['game'].shape == (4, 2, 4)
+        assert h["user"].shape == (4, 1, 3)
+        assert h["game"].shape == (4, 2, 4)

    # test with mod args
    class MyMod(tf.keras.layers.Layer):
@@ -478,23 +551,28 @@ def test_hetero_conv(agg, idtype):
            self.carg2 = 0
            self.s1 = s1
            self.s2 = s2
+
        def call(self, g, h, arg1=None, *, arg2=None):
            if arg1 is not None:
                self.carg1 += 1
            if arg2 is not None:
                self.carg2 += 1
            return tf.zeros((g.number_of_dst_nodes(), self.s2))
+
    mod1 = MyMod(2, 3)
    mod2 = MyMod(2, 4)
    mod3 = MyMod(3, 4)
-    conv = nn.HeteroGraphConv({
-        'follows': mod1,
-        'plays': mod2,
-        'sells': mod3},
-        agg)
-    mod_args = {'follows' : (1,), 'plays' : (1,)}
-    mod_kwargs = {'sells' : {'arg2' : 'abc'}}
-    h = conv(g, {'user' : uf, 'game': gf, 'store' : sf}, mod_args=mod_args, mod_kwargs=mod_kwargs)
+    conv = nn.HeteroGraphConv(
+        {"follows": mod1, "plays": mod2, "sells": mod3}, agg
+    )
+    mod_args = {"follows": (1,), "plays": (1,)}
+    mod_kwargs = {"sells": {"arg2": "abc"}}
+    h = conv(
+        g,
+        {"user": uf, "game": gf, "store": sf},
+        mod_args=mod_args,
+        mod_kwargs=mod_kwargs,
+    )
    assert mod1.carg1 == 1
    assert mod1.carg2 == 0
    assert mod2.carg1 == 1
@@ -502,28 +580,38 @@ def test_hetero_conv(agg, idtype):
    assert mod3.carg1 == 0
    assert mod3.carg2 == 1

-    #conv on graph without any edges
+    # conv on graph without any edges
    for etype in g.etypes:
-        g = dgl.remove_edges(g, g.edges(form='eid', etype=etype), etype=etype)
+        g = dgl.remove_edges(g, g.edges(form="eid", etype=etype), etype=etype)
    assert g.num_edges() == 0
-    h = conv(g, {'user': uf, 'game': gf, 'store': sf})
-    assert set(h.keys()) == {'user', 'game'}
-
-    block = dgl.to_block(g.to(F.cpu()), {'user': [0, 1, 2, 3], 'game': [
-                         0, 1, 2, 3], 'store': []}).to(F.ctx())
-    h = conv(block, ({'user': uf, 'game': gf, 'store': sf},
-             {'user': uf, 'game': gf, 'store': sf[0:0]}))
-    assert set(h.keys()) == {'user', 'game'}
+    h = conv(g, {"user": uf, "game": gf, "store": sf})
+    assert set(h.keys()) == {"user", "game"}
+
+    block = dgl.to_block(
+        g.to(F.cpu()), {"user": [0, 1, 2, 3], "game": [0, 1, 2, 3], "store": []}
+    ).to(F.ctx())
+    h = conv(
+        block,
+        (
+            {"user": uf, "game": gf, "store": sf},
+            {"user": uf, "game": gf, "store": sf[0:0]},
+        ),
+    )
+    assert set(h.keys()) == {"user", "game"}


-@pytest.mark.parametrize('out_dim', [1, 2])
+@pytest.mark.parametrize("out_dim", [1, 2])
 def test_dense_cheb_conv(out_dim):
    for k in range(3, 4):
        ctx = F.ctx()
-        g = dgl.DGLGraph(sp.sparse.random(100, 100, density=0.1, random_state=42))
+        g = dgl.DGLGraph(
+            sp.sparse.random(100, 100, density=0.1, random_state=42)
+        )
        g = g.to(ctx)

-        adj = tf.sparse.to_dense(tf.sparse.reorder(g.adjacency_matrix(transpose=True, ctx=ctx)))
+        adj = tf.sparse.to_dense(
+            tf.sparse.reorder(g.adjacency_matrix(transpose=True, ctx=ctx))
+        )
        cheb = nn.ChebConv(5, out_dim, k, None, bias=True)
        dense_cheb = nn.DenseChebConv(5, out_dim, k, bias=True)

@@ -540,7 +628,7 @@ def test_dense_cheb_conv(out_dim):
        assert F.allclose(out_cheb, out_dense_cheb)


-if __name__ == '__main__':
+if __name__ == "__main__":
    test_graph_conv()
    # test_set2set()
    test_glob_att_pool()

--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
-import pytest
 import backend as F
+import pytest

 parametrize_idtype = pytest.mark.parametrize("idtype", [F.int32, F.int64])


--- a/tests/test_utils/checks.py
+++ b/tests/test_utils/checks.py
-import dgl
 import backend as F

-__all__ = ['check_graph_equal']
+import dgl
+
+__all__ = ["check_graph_equal"]
+

-def check_graph_equal(g1, g2, *,
-                      check_idtype=True,
-                      check_feature=True):
+def check_graph_equal(g1, g2, *, check_idtype=True, check_feature=True):
    assert g1.device == g1.device
    if check_idtype:
        assert g1.idtype == g2.idtype
@@ -26,8 +26,8 @@ def check_graph_equal(g1, g2, *,
    for ety in g1.canonical_etypes:
        assert g1.number_of_edges(ety) == g2.number_of_edges(ety)
        assert F.allclose(g1.batch_num_edges(ety), g2.batch_num_edges(ety))
-        src1, dst1, eid1 = g1.edges(etype=ety, form='all')
-        src2, dst2, eid2 = g2.edges(etype=ety, form='all')
+        src1, dst1, eid1 = g1.edges(etype=ety, form="all")
+        src2, dst2, eid2 = g2.edges(etype=ety, form="all")
        if check_idtype:
            assert F.allclose(src1, src2)
            assert F.allclose(dst1, dst2)
@@ -42,9 +42,13 @@ def check_graph_equal(g1, g2, *,
            if g1.number_of_nodes(nty) == 0:
                continue
            for feat_name in g1.nodes[nty].data.keys():
-                assert F.allclose(g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name])
+                assert F.allclose(
+                    g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name]
+                )
        for ety in g1.canonical_etypes:
            if g1.number_of_edges(ety) == 0:
                continue
            for feat_name in g2.edges[ety].data.keys():
-                assert F.allclose(g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name])
+                assert F.allclose(
+                    g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name]
+                )
--- a/tests/tools/test_dist_part.py
+++ b/tests/tools/test_dist_part.py
@@ -3,14 +3,15 @@ import os
 import tempfile
 import unittest

-import dgl
 import numpy as np
 import pytest
 import torch
 from chunk_graph import chunk_graph
+from create_chunked_dataset import create_chunked_dataset
+
+import dgl
 from dgl.data.utils import load_graphs, load_tensors

-from create_chunked_dataset import create_chunked_dataset

 @pytest.mark.parametrize("num_chunks", [1, 8])
 def test_chunk_graph(num_chunks):
@@ -19,43 +20,43 @@ def test_chunk_graph(num_chunks):

        g = create_chunked_dataset(root_dir, num_chunks, include_edge_data=True)

-        num_cite_edges = g.number_of_edges('cites')
-        num_write_edges = g.number_of_edges('writes')
-        num_affiliate_edges = g.number_of_edges('affiliated_with')
+        num_cite_edges = g.number_of_edges("cites")
+        num_write_edges = g.number_of_edges("writes")
+        num_affiliate_edges = g.number_of_edges("affiliated_with")

-        num_institutions = g.number_of_nodes('institution')
-        num_authors = g.number_of_nodes('author')
-        num_papers = g.number_of_nodes('paper')
+        num_institutions = g.number_of_nodes("institution")
+        num_authors = g.number_of_nodes("author")
+        num_papers = g.number_of_nodes("paper")

        # check metadata.json
-        output_dir = os.path.join(root_dir, 'chunked-data')
-        json_file = os.path.join(output_dir, 'metadata.json')
+        output_dir = os.path.join(root_dir, "chunked-data")
+        json_file = os.path.join(output_dir, "metadata.json")
        assert os.path.isfile(json_file)
-        with open(json_file, 'rb') as f:
+        with open(json_file, "rb") as f:
            meta_data = json.load(f)
-        assert meta_data['graph_name'] == 'mag240m'
-        assert len(meta_data['num_nodes_per_chunk'][0]) == num_chunks
+        assert meta_data["graph_name"] == "mag240m"
+        assert len(meta_data["num_nodes_per_chunk"][0]) == num_chunks

        # check edge_index
-        output_edge_index_dir = os.path.join(output_dir, 'edge_index')
+        output_edge_index_dir = os.path.join(output_dir, "edge_index")
        for utype, etype, vtype in g.canonical_etypes:
-            fname = ':'.join([utype, etype, vtype])
+            fname = ":".join([utype, etype, vtype])
            for i in range(num_chunks):
                chunk_f_name = os.path.join(
-                    output_edge_index_dir, fname + str(i) + '.txt'
+                    output_edge_index_dir, fname + str(i) + ".txt"
                )
                assert os.path.isfile(chunk_f_name)
-                with open(chunk_f_name, 'r') as f:
+                with open(chunk_f_name, "r") as f:
                    header = f.readline()
-                    num1, num2 = header.rstrip().split(' ')
+                    num1, num2 = header.rstrip().split(" ")
                    assert isinstance(int(num1), int)
                    assert isinstance(int(num2), int)

        # check node_data
-        output_node_data_dir = os.path.join(output_dir, 'node_data', 'paper')
-        for feat in ['feat', 'label', 'year']:
+        output_node_data_dir = os.path.join(output_dir, "node_data", "paper")
+        for feat in ["feat", "label", "year"]:
            for i in range(num_chunks):
-                chunk_f_name = '{}-{}.npy'.format(feat, i)
+                chunk_f_name = "{}-{}.npy".format(feat, i)
                chunk_f_name = os.path.join(output_node_data_dir, chunk_f_name)
                assert os.path.isfile(chunk_f_name)
                feat_array = np.load(chunk_f_name)
@@ -63,19 +64,19 @@ def test_chunk_graph(num_chunks):

        # check edge_data
        num_edges = {
-            'paper:cites:paper': num_cite_edges,
-            'author:writes:paper': num_write_edges,
-            'paper:rev_writes:author': num_write_edges,
+            "paper:cites:paper": num_cite_edges,
+            "author:writes:paper": num_write_edges,
+            "paper:rev_writes:author": num_write_edges,
        }
-        output_edge_data_dir = os.path.join(output_dir, 'edge_data')
+        output_edge_data_dir = os.path.join(output_dir, "edge_data")
        for etype, feat in [
-            ['paper:cites:paper', 'count'],
-            ['author:writes:paper', 'year'],
-            ['paper:rev_writes:author', 'year'],
+            ["paper:cites:paper", "count"],
+            ["author:writes:paper", "year"],
+            ["paper:rev_writes:author", "year"],
        ]:
            output_edge_sub_dir = os.path.join(output_edge_data_dir, etype)
            for i in range(num_chunks):
-                chunk_f_name = '{}-{}.npy'.format(feat, i)
+                chunk_f_name = "{}-{}.npy".format(feat, i)
                chunk_f_name = os.path.join(output_edge_sub_dir, chunk_f_name)
                assert os.path.isfile(chunk_f_name)
                feat_array = np.load(chunk_f_name)
@@ -100,63 +101,63 @@ def test_part_pipeline(num_chunks, num_parts):
        all_ntypes = g.ntypes
        all_etypes = g.etypes

-        num_cite_edges = g.number_of_edges('cites')
-        num_write_edges = g.number_of_edges('writes')
-        num_affiliate_edges = g.number_of_edges('affiliated_with')
+        num_cite_edges = g.number_of_edges("cites")
+        num_write_edges = g.number_of_edges("writes")
+        num_affiliate_edges = g.number_of_edges("affiliated_with")

-        num_institutions = g.number_of_nodes('institution')
-        num_authors = g.number_of_nodes('author')
-        num_papers = g.number_of_nodes('paper')
+        num_institutions = g.number_of_nodes("institution")
+        num_authors = g.number_of_nodes("author")
+        num_papers = g.number_of_nodes("paper")

        # Step1: graph partition
-        in_dir = os.path.join(root_dir, 'chunked-data')
-        output_dir = os.path.join(root_dir, 'parted_data')
+        in_dir = os.path.join(root_dir, "chunked-data")
+        output_dir = os.path.join(root_dir, "parted_data")
        os.system(
-            'python3 tools/partition_algo/random_partition.py '
-            '--in_dir {} --out_dir {} --num_partitions {}'.format(
+            "python3 tools/partition_algo/random_partition.py "
+            "--in_dir {} --out_dir {} --num_partitions {}".format(
                in_dir, output_dir, num_parts
            )
        )
-        for ntype in ['author', 'institution', 'paper']:
-            fname = os.path.join(output_dir, '{}.txt'.format(ntype))
-            with open(fname, 'r') as f:
+        for ntype in ["author", "institution", "paper"]:
+            fname = os.path.join(output_dir, "{}.txt".format(ntype))
+            with open(fname, "r") as f:
                header = f.readline().rstrip()
                assert isinstance(int(header), int)

        # Step2: data dispatch
-        partition_dir = os.path.join(root_dir, 'parted_data')
-        out_dir = os.path.join(root_dir, 'partitioned')
-        ip_config = os.path.join(root_dir, 'ip_config.txt')
-        with open(ip_config, 'w') as f:
+        partition_dir = os.path.join(root_dir, "parted_data")
+        out_dir = os.path.join(root_dir, "partitioned")
+        ip_config = os.path.join(root_dir, "ip_config.txt")
+        with open(ip_config, "w") as f:
            for i in range(num_parts):
-                f.write(f'127.0.0.{i + 1}\n')
-
-        cmd = 'python3 tools/dispatch_data.py'
-        cmd += f' --in-dir {in_dir}'
-        cmd += f' --partitions-dir {partition_dir}'
-        cmd += f' --out-dir {out_dir}'
-        cmd += f' --ip-config {ip_config}'
-        cmd += ' --process-group-timeout 60'
-        cmd += ' --save-orig-nids'
-        cmd += ' --save-orig-eids'
+                f.write(f"127.0.0.{i + 1}\n")
+
+        cmd = "python3 tools/dispatch_data.py"
+        cmd += f" --in-dir {in_dir}"
+        cmd += f" --partitions-dir {partition_dir}"
+        cmd += f" --out-dir {out_dir}"
+        cmd += f" --ip-config {ip_config}"
+        cmd += " --process-group-timeout 60"
+        cmd += " --save-orig-nids"
+        cmd += " --save-orig-eids"
        os.system(cmd)

        # check metadata.json
-        meta_fname = os.path.join(out_dir, 'metadata.json')
-        with open(meta_fname, 'rb') as f:
+        meta_fname = os.path.join(out_dir, "metadata.json")
+        with open(meta_fname, "rb") as f:
            meta_data = json.load(f)

        for etype in all_etypes:
-            assert len(meta_data['edge_map'][etype]) == num_parts
-        assert meta_data['etypes'].keys() == set(all_etypes)
-        assert meta_data['graph_name'] == 'mag240m'
+            assert len(meta_data["edge_map"][etype]) == num_parts
+        assert meta_data["etypes"].keys() == set(all_etypes)
+        assert meta_data["graph_name"] == "mag240m"

        for ntype in all_ntypes:
-            assert len(meta_data['node_map'][ntype]) == num_parts
-        assert meta_data['ntypes'].keys() == set(all_ntypes)
-        assert meta_data['num_edges'] == g.num_edges()
-        assert meta_data['num_nodes'] == g.num_nodes()
-        assert meta_data['num_parts'] == num_parts
+            assert len(meta_data["node_map"][ntype]) == num_parts
+        assert meta_data["ntypes"].keys() == set(all_ntypes)
+        assert meta_data["num_edges"] == g.num_edges()
+        assert meta_data["num_nodes"] == g.num_nodes()
+        assert meta_data["num_parts"] == num_parts

        edge_dict = {}
        edge_data_gold = {}
@@ -165,7 +166,7 @@ def test_part_pipeline(num_chunks, num_parts):
            # Create Id Map here.
            num_edges = 0
            for utype, etype, vtype in g.canonical_etypes:
-                fname = ':'.join([utype, etype, vtype])
+                fname = ":".join([utype, etype, vtype])
                edge_dict[fname] = np.array(
                    [num_edges, num_edges + g.number_of_edges(etype)]
                ).reshape(1, 2)
@@ -177,21 +178,21 @@ def test_part_pipeline(num_chunks, num_parts):

            # check edge_data
            num_edges = {
-                'paper:cites:paper': num_cite_edges,
-                'author:writes:paper': num_write_edges,
-                'paper:rev_writes:author': num_write_edges,
+                "paper:cites:paper": num_cite_edges,
+                "author:writes:paper": num_write_edges,
+                "paper:rev_writes:author": num_write_edges,
            }
-            output_dir = os.path.join(root_dir, 'chunked-data')
-            output_edge_data_dir = os.path.join(output_dir, 'edge_data')
+            output_dir = os.path.join(root_dir, "chunked-data")
+            output_edge_data_dir = os.path.join(output_dir, "edge_data")
            for etype, feat in [
-                ['paper:cites:paper', 'count'],
-                ['author:writes:paper', 'year'],
-                ['paper:rev_writes:author', 'year'],
+                ["paper:cites:paper", "count"],
+                ["author:writes:paper", "year"],
+                ["paper:rev_writes:author", "year"],
            ]:
                output_edge_sub_dir = os.path.join(output_edge_data_dir, etype)
                features = []
                for i in range(num_chunks):
-                    chunk_f_name = '{}-{}.npy'.format(feat, i)
+                    chunk_f_name = "{}-{}.npy".format(feat, i)
                    chunk_f_name = os.path.join(
                        output_edge_sub_dir, chunk_f_name
                    )
@@ -199,54 +200,54 @@ def test_part_pipeline(num_chunks, num_parts):
                    feat_array = np.load(chunk_f_name)
                    assert feat_array.shape[0] == num_edges[etype] // num_chunks
                features.append(feat_array)
-                edge_data_gold[etype + '/' + feat] = np.concatenate(features)
+                edge_data_gold[etype + "/" + feat] = np.concatenate(features)

        for i in range(num_parts):
-            sub_dir = 'part-' + str(i)
+            sub_dir = "part-" + str(i)
            assert meta_data[sub_dir][
-                'node_feats'
-            ] == 'part{}/node_feat.dgl'.format(i)
+                "node_feats"
+            ] == "part{}/node_feat.dgl".format(i)
            assert meta_data[sub_dir][
-                'edge_feats'
-            ] == 'part{}/edge_feat.dgl'.format(i)
+                "edge_feats"
+            ] == "part{}/edge_feat.dgl".format(i)
            assert meta_data[sub_dir][
-                'part_graph'
-            ] == 'part{}/graph.dgl'.format(i)
+                "part_graph"
+            ] == "part{}/graph.dgl".format(i)

            # check data
-            sub_dir = os.path.join(out_dir, 'part' + str(i))
+            sub_dir = os.path.join(out_dir, "part" + str(i))

            # graph.dgl
-            fname = os.path.join(sub_dir, 'graph.dgl')
+            fname = os.path.join(sub_dir, "graph.dgl")
            assert os.path.isfile(fname)
            g_list, data_dict = load_graphs(fname)
            part_g = g_list[0]
            assert isinstance(part_g, dgl.DGLGraph)

            # node_feat.dgl
-            fname = os.path.join(sub_dir, 'node_feat.dgl')
+            fname = os.path.join(sub_dir, "node_feat.dgl")
            assert os.path.isfile(fname)
            tensor_dict = load_tensors(fname)
            all_tensors = [
-                'paper/feat',
-                'paper/label',
-                'paper/year',
-                'paper/orig_ids',
+                "paper/feat",
+                "paper/label",
+                "paper/year",
+                "paper/orig_ids",
            ]
            assert tensor_dict.keys() == set(all_tensors)
            for key in all_tensors:
                assert isinstance(tensor_dict[key], torch.Tensor)
-            ndata_paper_orig_ids = tensor_dict['paper/orig_ids']
+            ndata_paper_orig_ids = tensor_dict["paper/orig_ids"]

            # orig_nids.dgl
-            fname = os.path.join(sub_dir, 'orig_nids.dgl')
+            fname = os.path.join(sub_dir, "orig_nids.dgl")
            assert os.path.isfile(fname)
            orig_nids = load_tensors(fname)
            assert len(orig_nids.keys()) == 3
-            assert torch.equal(ndata_paper_orig_ids, orig_nids['paper'])
+            assert torch.equal(ndata_paper_orig_ids, orig_nids["paper"])

            # orig_eids.dgl
-            fname = os.path.join(sub_dir, 'orig_eids.dgl')
+            fname = os.path.join(sub_dir, "orig_eids.dgl")
            assert os.path.isfile(fname)
            orig_eids = load_tensors(fname)
            assert len(orig_eids.keys()) == 4
@@ -254,13 +255,13 @@ def test_part_pipeline(num_chunks, num_parts):
            if include_edge_data:

                # Read edge_feat.dgl
-                fname = os.path.join(sub_dir, 'edge_feat.dgl')
+                fname = os.path.join(sub_dir, "edge_feat.dgl")
                assert os.path.isfile(fname)
                tensor_dict = load_tensors(fname)
                all_tensors = [
-                    'paper:cites:paper/count',
-                    'author:writes:paper/year',
-                    'paper:rev_writes:author/year',
+                    "paper:cites:paper/count",
+                    "author:writes:paper/year",
+                    "paper:rev_writes:author/year",
                ]
                assert tensor_dict.keys() == set(all_tensors)
                for key in all_tensors:

--- a/tests/tools/test_launch.py
+++ b/tests/tools/test_launch.py
-import unittest
 import json
-import tempfile
 import os
+import tempfile
+import unittest
+
 from launch import *

+
 class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
    """wrap_udf_in_torch_dist_launcher()"""

@@ -18,14 +20,18 @@ class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
            master_addr="127.0.0.1",
            master_port=1234,
        )
-        expected = "python3.7 -m torch.distributed.launch " \
-                   "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " \
-                   "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        expected = (
+            "python3.7 -m torch.distributed.launch "
+            "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 "
+            "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        )
        self.assertEqual(wrapped_udf_command, expected)

    def test_chained_udf(self):
        # test that a chained udf_command is properly handled
-        udf_command = "cd path/to && python3.7 path/to/some/trainer.py arg1 arg2"
+        udf_command = (
+            "cd path/to && python3.7 path/to/some/trainer.py arg1 arg2"
+        )
        wrapped_udf_command = wrap_udf_in_torch_dist_launcher(
            udf_command=udf_command,
            num_trainers=2,
@@ -34,15 +40,21 @@ class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
            master_addr="127.0.0.1",
            master_port=1234,
        )
-        expected = "cd path/to && python3.7 -m torch.distributed.launch " \
-                   "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " \
-                   "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        expected = (
+            "cd path/to && python3.7 -m torch.distributed.launch "
+            "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 "
+            "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        )
        self.assertEqual(wrapped_udf_command, expected)

    def test_py_versions(self):
        # test that this correctly handles different py versions/binaries
        py_binaries = (
-            "python3.7", "python3.8", "python3.9", "python3", "python"
+            "python3.7",
+            "python3.8",
+            "python3.9",
+            "python3",
+            "python",
        )
        udf_command = "{python_bin} path/to/some/trainer.py arg1 arg2"

@@ -55,9 +67,13 @@ class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
                master_addr="127.0.0.1",
                master_port=1234,
            )
-            expected = "{python_bin} -m torch.distributed.launch ".format(python_bin=py_bin) + \
-                       "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " \
-                       "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+            expected = (
+                "{python_bin} -m torch.distributed.launch ".format(
+                    python_bin=py_bin
+                )
+                + "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 "
+                "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+            )
            self.assertEqual(wrapped_udf_command, expected)


@@ -67,12 +83,13 @@ class TestWrapCmdWithLocalEnvvars(unittest.TestCase):
    def test_simple(self):
        self.assertEqual(
            wrap_cmd_with_local_envvars("ls && pwd", "VAR1=value1 VAR2=value2"),
-            "(export VAR1=value1 VAR2=value2; ls && pwd)"
+            "(export VAR1=value1 VAR2=value2; ls && pwd)",
        )


 class TestConstructDglServerEnvVars(unittest.TestCase):
    """construct_dgl_server_env_vars()"""
+
    def test_simple(self):
        self.assertEqual(
            construct_dgl_server_env_vars(
@@ -83,7 +100,7 @@ class TestConstructDglServerEnvVars(unittest.TestCase):
                ip_config="path/to/ip.config",
                num_servers=5,
                graph_format="csc",
-                keep_alive=False
+                keep_alive=False,
            ),
            (
                "DGL_ROLE=server "
@@ -95,12 +112,13 @@ class TestConstructDglServerEnvVars(unittest.TestCase):
                "DGL_NUM_SERVER=5 "
                "DGL_GRAPH_FORMAT=csc "
                "DGL_KEEP_ALIVE=0 "
-            )
+            ),
        )


 class TestConstructDglClientEnvVars(unittest.TestCase):
    """construct_dgl_client_env_vars()"""
+
    def test_simple(self):
        # with pythonpath
        self.assertEqual(
@@ -113,7 +131,7 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                graph_format="csc",
                num_omp_threads=4,
                group_id=0,
-                pythonpath="some/pythonpath/"
+                pythonpath="some/pythonpath/",
            ),
            (
                "DGL_DIST_MODE=distributed "
@@ -127,7 +145,7 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                "OMP_NUM_THREADS=4 "
                "DGL_GROUP_ID=0 "
                "PYTHONPATH=some/pythonpath/ "
-            )
+            ),
        )
        # without pythonpath
        self.assertEqual(
@@ -139,7 +157,7 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                num_servers=3,
                graph_format="csc",
                num_omp_threads=4,
-                group_id=0
+                group_id=0,
            ),
            (
                "DGL_DIST_MODE=distributed "
@@ -152,64 +170,72 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                "DGL_GRAPH_FORMAT=csc "
                "OMP_NUM_THREADS=4 "
                "DGL_GROUP_ID=0 "
-            )
+            ),
        )


 def test_submit_jobs():
-    class Args():
+    class Args:
        pass
+
    args = Args()

    with tempfile.TemporaryDirectory() as test_dir:
        num_machines = 8
-        ip_config = os.path.join(test_dir, 'ip_config.txt')
-        with open(ip_config, 'w') as f:
+        ip_config = os.path.join(test_dir, "ip_config.txt")
+        with open(ip_config, "w") as f:
            for i in range(num_machines):
-                f.write('{} {}\n'.format('127.0.0.'+str(i), 30050))
-        part_config = os.path.join(test_dir, 'ogb-products.json')
-        with open(part_config, 'w') as f:
-            json.dump({'num_parts': num_machines}, f)
+                f.write("{} {}\n".format("127.0.0." + str(i), 30050))
+        part_config = os.path.join(test_dir, "ogb-products.json")
+        with open(part_config, "w") as f:
+            json.dump({"num_parts": num_machines}, f)
        args.num_trainers = 8
        args.num_samplers = 1
        args.num_servers = 4
        args.workspace = test_dir
-        args.part_config = 'ogb-products.json'
-        args.ip_config = 'ip_config.txt'
-        args.server_name = 'ogb-products'
+        args.part_config = "ogb-products.json"
+        args.ip_config = "ip_config.txt"
+        args.server_name = "ogb-products"
        args.keep_alive = False
        args.num_server_threads = 1
-        args.graph_format = 'csc'
+        args.graph_format = "csc"
        args.extra_envs = ["NCCL_DEBUG=INFO"]
        args.num_omp_threads = 1
        udf_command = "python3 train_dist.py --num_epochs 10"
        clients_cmd, servers_cmd = submit_jobs(args, udf_command, dry_run=True)

        def common_checks():
-            assert 'cd ' + test_dir in cmd
-            assert 'export ' + args.extra_envs[0] in cmd
-            assert f'DGL_NUM_SAMPLER={args.num_samplers}' in cmd
-            assert f'DGL_NUM_CLIENT={args.num_trainers*(args.num_samplers+1)*num_machines}' in cmd
-            assert f'DGL_CONF_PATH={args.part_config}' in cmd
-            assert f'DGL_IP_CONFIG={args.ip_config}' in cmd
-            assert f'DGL_NUM_SERVER={args.num_servers}' in cmd
-            assert f'DGL_GRAPH_FORMAT={args.graph_format}' in cmd
-            assert f'OMP_NUM_THREADS={args.num_omp_threads}' in cmd
-            assert udf_command[len('python3 '):] in cmd
+            assert "cd " + test_dir in cmd
+            assert "export " + args.extra_envs[0] in cmd
+            assert f"DGL_NUM_SAMPLER={args.num_samplers}" in cmd
+            assert (
+                f"DGL_NUM_CLIENT={args.num_trainers*(args.num_samplers+1)*num_machines}"
+                in cmd
+            )
+            assert f"DGL_CONF_PATH={args.part_config}" in cmd
+            assert f"DGL_IP_CONFIG={args.ip_config}" in cmd
+            assert f"DGL_NUM_SERVER={args.num_servers}" in cmd
+            assert f"DGL_GRAPH_FORMAT={args.graph_format}" in cmd
+            assert f"OMP_NUM_THREADS={args.num_omp_threads}" in cmd
+            assert udf_command[len("python3 ") :] in cmd
+
        for cmd in clients_cmd:
            common_checks()
-            assert 'DGL_DIST_MODE=distributed' in cmd
-            assert 'DGL_ROLE=client' in cmd
-            assert 'DGL_GROUP_ID=0' in cmd
-            assert f'python3 -m torch.distributed.launch --nproc_per_node={args.num_trainers} --nnodes={num_machines}' in cmd
-            assert '--master_addr=127.0.0' in cmd
-            assert '--master_port=1234' in cmd
+            assert "DGL_DIST_MODE=distributed" in cmd
+            assert "DGL_ROLE=client" in cmd
+            assert "DGL_GROUP_ID=0" in cmd
+            assert (
+                f"python3 -m torch.distributed.launch --nproc_per_node={args.num_trainers} --nnodes={num_machines}"
+                in cmd
+            )
+            assert "--master_addr=127.0.0" in cmd
+            assert "--master_port=1234" in cmd
        for cmd in servers_cmd:
            common_checks()
-            assert 'DGL_ROLE=server' in cmd
-            assert 'DGL_KEEP_ALIVE=0' in cmd
-            assert 'DGL_SERVER_ID=' in cmd
+            assert "DGL_ROLE=server" in cmd
+            assert "DGL_KEEP_ALIVE=0" in cmd
+            assert "DGL_SERVER_ID=" in cmd


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/tools/chunk_graph.py
+++ b/tools/chunk_graph.py
 # See the __main__ block for usage of chunk_graph().
-import pathlib
 import json
-from contextlib import contextmanager
 import logging
 import os
+import pathlib
+from contextlib import contextmanager

 import torch
+from utils import array_readwriter, setdir
+
 import dgl

-from utils import setdir
-from utils import array_readwriter

 def chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt):
    paths = []
@@ -17,31 +17,38 @@ def chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt):

    for j, n in enumerate(chunk_sizes):
        path = os.path.abspath(path_fmt % j)
-        arr_chunk = arr[offset:offset + n]
-        logging.info('Chunking %d-%d' % (offset, offset + n))
+        arr_chunk = arr[offset : offset + n]
+        logging.info("Chunking %d-%d" % (offset, offset + n))
        array_readwriter.get_array_parser(**fmt_meta).write(path, arr_chunk)
        offset += n
        paths.append(path)

    return paths

+
 def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
    # First deal with ndata and edata that are homogeneous (i.e. not a dict-of-dict)
-    if len(g.ntypes) == 1 and not isinstance(next(iter(ndata_paths.values())), dict):
+    if len(g.ntypes) == 1 and not isinstance(
+        next(iter(ndata_paths.values())), dict
+    ):
        ndata_paths = {g.ntypes[0]: ndata_paths}
-    if len(g.etypes) == 1 and not isinstance(next(iter(edata_paths.values())), dict):
+    if len(g.etypes) == 1 and not isinstance(
+        next(iter(edata_paths.values())), dict
+    ):
        edata_paths = {g.etypes[0]: ndata_paths}
    # Then convert all edge types to canonical edge types
-    etypestrs = {etype: ':'.join(etype) for etype in g.canonical_etypes}
-    edata_paths = {':'.join(g.to_canonical_etype(k)): v for k, v in edata_paths.items()}
+    etypestrs = {etype: ":".join(etype) for etype in g.canonical_etypes}
+    edata_paths = {
+        ":".join(g.to_canonical_etype(k)): v for k, v in edata_paths.items()
+    }

    metadata = {}

-    metadata['graph_name'] = name
-    metadata['node_type'] = g.ntypes
+    metadata["graph_name"] = name
+    metadata["node_type"] = g.ntypes

    # Compute the number of nodes per chunk per node type
-    metadata['num_nodes_per_chunk'] = num_nodes_per_chunk = []
+    metadata["num_nodes_per_chunk"] = num_nodes_per_chunk = []
    for ntype in g.ntypes:
        num_nodes = g.num_nodes(ntype)
        num_nodes_list = []
@@ -49,12 +56,14 @@ def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
            n = num_nodes // num_chunks + (i < num_nodes % num_chunks)
            num_nodes_list.append(n)
        num_nodes_per_chunk.append(num_nodes_list)
-    num_nodes_per_chunk_dict = {k: v for k, v in zip(g.ntypes, num_nodes_per_chunk)}
+    num_nodes_per_chunk_dict = {
+        k: v for k, v in zip(g.ntypes, num_nodes_per_chunk)
+    }

-    metadata['edge_type'] = [etypestrs[etype] for etype in g.canonical_etypes]
+    metadata["edge_type"] = [etypestrs[etype] for etype in g.canonical_etypes]

    # Compute the number of edges per chunk per edge type
-    metadata['num_edges_per_chunk'] = num_edges_per_chunk = []
+    metadata["num_edges_per_chunk"] = num_edges_per_chunk = []
    for etype in g.canonical_etypes:
        num_edges = g.num_edges(etype)
        num_edges_list = []
@@ -62,67 +71,88 @@ def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
            n = num_edges // num_chunks + (i < num_edges % num_chunks)
            num_edges_list.append(n)
        num_edges_per_chunk.append(num_edges_list)
-    num_edges_per_chunk_dict = {k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk)}
+    num_edges_per_chunk_dict = {
+        k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk)
+    }

    # Split edge index
-    metadata['edges'] = {}
-    with setdir('edge_index'):
+    metadata["edges"] = {}
+    with setdir("edge_index"):
        for etype in g.canonical_etypes:
            etypestr = etypestrs[etype]
-            logging.info('Chunking edge index for %s' % etypestr)
+            logging.info("Chunking edge index for %s" % etypestr)
            edges_meta = {}
            fmt_meta = {"name": "csv", "delimiter": " "}
-            edges_meta['format'] = fmt_meta
+            edges_meta["format"] = fmt_meta

            srcdst = torch.stack(g.edges(etype=etype), 1)
-            edges_meta['data'] = chunk_numpy_array(
-                    srcdst.numpy(), fmt_meta, num_edges_per_chunk_dict[etype],
-                    etypestr + '%d.txt')
-            metadata['edges'][etypestr] = edges_meta
+            edges_meta["data"] = chunk_numpy_array(
+                srcdst.numpy(),
+                fmt_meta,
+                num_edges_per_chunk_dict[etype],
+                etypestr + "%d.txt",
+            )
+            metadata["edges"][etypestr] = edges_meta

    # Chunk node data
-    metadata['node_data'] = {}
-    with setdir('node_data'):
+    metadata["node_data"] = {}
+    with setdir("node_data"):
        for ntype, ndata_per_type in ndata_paths.items():
            ndata_meta = {}
            with setdir(ntype):
                for key, path in ndata_per_type.items():
-                    logging.info('Chunking node data for type %s key %s' % (ntype, key))
+                    logging.info(
+                        "Chunking node data for type %s key %s" % (ntype, key)
+                    )
                    ndata_key_meta = {}
                    reader_fmt_meta = writer_fmt_meta = {"name": "numpy"}
-                    arr = array_readwriter.get_array_parser(**reader_fmt_meta).read(path)
-                    ndata_key_meta['format'] = writer_fmt_meta
-                    ndata_key_meta['data'] = chunk_numpy_array(
-                            arr, writer_fmt_meta, num_nodes_per_chunk_dict[ntype],
-                            key + '-%d.npy')
+                    arr = array_readwriter.get_array_parser(
+                        **reader_fmt_meta
+                    ).read(path)
+                    ndata_key_meta["format"] = writer_fmt_meta
+                    ndata_key_meta["data"] = chunk_numpy_array(
+                        arr,
+                        writer_fmt_meta,
+                        num_nodes_per_chunk_dict[ntype],
+                        key + "-%d.npy",
+                    )
                    ndata_meta[key] = ndata_key_meta

-            metadata['node_data'][ntype] = ndata_meta
+            metadata["node_data"][ntype] = ndata_meta

    # Chunk edge data
-    metadata['edge_data'] = {}
-    with setdir('edge_data'):
+    metadata["edge_data"] = {}
+    with setdir("edge_data"):
        for etypestr, edata_per_type in edata_paths.items():
            edata_meta = {}
            with setdir(etypestr):
                for key, path in edata_per_type.items():
-                    logging.info('Chunking edge data for type %s key %s' % (etypestr, key))
+                    logging.info(
+                        "Chunking edge data for type %s key %s"
+                        % (etypestr, key)
+                    )
                    edata_key_meta = {}
                    reader_fmt_meta = writer_fmt_meta = {"name": "numpy"}
-                    arr = array_readwriter.get_array_parser(**reader_fmt_meta).read(path)
-                    edata_key_meta['format'] = writer_fmt_meta
-                    etype = tuple(etypestr.split(':'))
-                    edata_key_meta['data'] = chunk_numpy_array(
-                            arr, writer_fmt_meta, num_edges_per_chunk_dict[etype],
-                            key + '-%d.npy')
+                    arr = array_readwriter.get_array_parser(
+                        **reader_fmt_meta
+                    ).read(path)
+                    edata_key_meta["format"] = writer_fmt_meta
+                    etype = tuple(etypestr.split(":"))
+                    edata_key_meta["data"] = chunk_numpy_array(
+                        arr,
+                        writer_fmt_meta,
+                        num_edges_per_chunk_dict[etype],
+                        key + "-%d.npy",
+                    )
                    edata_meta[key] = edata_key_meta

-            metadata['edge_data'][etypestr] = edata_meta
+            metadata["edge_data"][etypestr] = edata_meta

-    metadata_path = 'metadata.json'
-    with open(metadata_path, 'w') as f:
+    metadata_path = "metadata.json"
+    with open(metadata_path, "w") as f:
        json.dump(metadata, f)
-    logging.info('Saved metadata in %s' % os.path.abspath(metadata_path))
+    logging.info("Saved metadata in %s" % os.path.abspath(metadata_path))
+

 def chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
    """
@@ -157,22 +187,29 @@ def chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
    with setdir(output_path):
        _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path)

-if __name__ == '__main__':
-    logging.basicConfig(level='INFO')
-    input_dir = '/data'
-    output_dir = '/chunked-data'
-    (g,), _ = dgl.load_graphs(os.path.join(input_dir, 'graph.dgl'))
+
+if __name__ == "__main__":
+    logging.basicConfig(level="INFO")
+    input_dir = "/data"
+    output_dir = "/chunked-data"
+    (g,), _ = dgl.load_graphs(os.path.join(input_dir, "graph.dgl"))
    chunk_graph(
-            g,
-            'mag240m',
-            {'paper': {
-                'feat': os.path.join(input_dir, 'paper/feat.npy'),
-                'label': os.path.join(input_dir, 'paper/label.npy'),
-                'year': os.path.join(input_dir, 'paper/year.npy')}},
-            {'cites': {'count': os.path.join(input_dir, 'cites/count.npy')},
-             'writes': {'year': os.path.join(input_dir, 'writes/year.npy')},
-             # you can put the same data file if they indeed share the features.
-             'rev_writes': {'year': os.path.join(input_dir, 'writes/year.npy')}},
-            4,
-            output_dir)
+        g,
+        "mag240m",
+        {
+            "paper": {
+                "feat": os.path.join(input_dir, "paper/feat.npy"),
+                "label": os.path.join(input_dir, "paper/label.npy"),
+                "year": os.path.join(input_dir, "paper/year.npy"),
+            }
+        },
+        {
+            "cites": {"count": os.path.join(input_dir, "cites/count.npy")},
+            "writes": {"year": os.path.join(input_dir, "writes/year.npy")},
+            # you can put the same data file if they indeed share the features.
+            "rev_writes": {"year": os.path.join(input_dir, "writes/year.npy")},
+        },
+        4,
+        output_dir,
+    )
 # The generated metadata goes as in tools/sample-config/mag240m-metadata.json.
--- a/tools/copy_files.py
+++ b/tools/copy_files.py
 """Copy the partitions to a cluster of machines."""
+import argparse
+import copy
+import json
+import logging
 import os
+import signal
 import stat
-import sys
 import subprocess
-import argparse
-import signal
-import logging
-import json
-import copy
+import sys
+
+
+def copy_file(file_name, ip, workspace, param=""):
+    print("copy {} to {}".format(file_name, ip + ":" + workspace + "/"))
+    cmd = "scp " + param + " " + file_name + " " + ip + ":" + workspace + "/"
+    subprocess.check_call(cmd, shell=True)

-def copy_file(file_name, ip, workspace, param=''):
-    print('copy {} to {}'.format(file_name, ip + ':' + workspace + '/'))
-    cmd = 'scp ' + param + ' ' + file_name + ' ' + ip + ':' + workspace + '/'
-    subprocess.check_call(cmd, shell = True)

 def exec_cmd(ip, cmd):
-    cmd = 'ssh -o StrictHostKeyChecking=no ' + ip + ' \'' + cmd + '\''
-    subprocess.check_call(cmd, shell = True)
+    cmd = "ssh -o StrictHostKeyChecking=no " + ip + " '" + cmd + "'"
+    subprocess.check_call(cmd, shell=True)
+

 def main():
-    parser = argparse.ArgumentParser(description='Copy data to the servers.')
-    parser.add_argument('--workspace', type=str, required=True,
-                        help='Path of user directory of distributed tasks. \
+    parser = argparse.ArgumentParser(description="Copy data to the servers.")
+    parser.add_argument(
+        "--workspace",
+        type=str,
+        required=True,
+        help="Path of user directory of distributed tasks. \
                        This is used to specify a destination location where \
-                        data are copied to on remote machines.')
-    parser.add_argument('--rel_data_path', type=str, required=True,
-                        help='Relative path in workspace to store the partition data.')
-    parser.add_argument('--part_config', type=str, required=True,
-                        help='The partition config file. The path is on the local machine.')
-    parser.add_argument('--script_folder', type=str, required=True,
-                        help='The folder contains all the user code scripts.')
-    parser.add_argument('--ip_config', type=str, required=True,
-                        help='The file of IP configuration for servers. \
-                        The path is on the local machine.')
+                        data are copied to on remote machines.",
+    )
+    parser.add_argument(
+        "--rel_data_path",
+        type=str,
+        required=True,
+        help="Relative path in workspace to store the partition data.",
+    )
+    parser.add_argument(
+        "--part_config",
+        type=str,
+        required=True,
+        help="The partition config file. The path is on the local machine.",
+    )
+    parser.add_argument(
+        "--script_folder",
+        type=str,
+        required=True,
+        help="The folder contains all the user code scripts.",
+    )
+    parser.add_argument(
+        "--ip_config",
+        type=str,
+        required=True,
+        help="The file of IP configuration for servers. \
+                        The path is on the local machine.",
+    )
    args = parser.parse_args()

    hosts = []
    with open(args.ip_config) as f:
        for line in f:
-            res = line.strip().split(' ')
+            res = line.strip().split(" ")
            ip = res[0]
            hosts.append(ip)
-    
+
    # We need to update the partition config file so that the paths are relative to
    # the workspace in the remote machines.
    with open(args.part_config) as conf_f:
        part_metadata = json.load(conf_f)
        tmp_part_metadata = copy.deepcopy(part_metadata)
-        num_parts = part_metadata['num_parts']
-        assert num_parts == len(hosts), \
-                'The number of partitions needs to be the same as the number of hosts.'
-        graph_name = part_metadata['graph_name']
-        node_map = part_metadata['node_map']
-        edge_map = part_metadata['edge_map']
+        num_parts = part_metadata["num_parts"]
+        assert num_parts == len(
+            hosts
+        ), "The number of partitions needs to be the same as the number of hosts."
+        graph_name = part_metadata["graph_name"]
+        node_map = part_metadata["node_map"]
+        edge_map = part_metadata["edge_map"]
        if not isinstance(node_map, dict):
-            assert node_map[-4:] == '.npy', 'node map should be stored in a NumPy array.'
-            tmp_part_metadata['node_map'] = '{}/{}/node_map.npy'.format(args.workspace,
-                                                                        args.rel_data_path)
+            assert (
+                node_map[-4:] == ".npy"
+            ), "node map should be stored in a NumPy array."
+            tmp_part_metadata["node_map"] = "{}/{}/node_map.npy".format(
+                args.workspace, args.rel_data_path
+            )
        if not isinstance(edge_map, dict):
-            assert edge_map[-4:] == '.npy', 'edge map should be stored in a NumPy array.'
-            tmp_part_metadata['edge_map'] = '{}/{}/edge_map.npy'.format(args.workspace,
-                                                                        args.rel_data_path)
+            assert (
+                edge_map[-4:] == ".npy"
+            ), "edge map should be stored in a NumPy array."
+            tmp_part_metadata["edge_map"] = "{}/{}/edge_map.npy".format(
+                args.workspace, args.rel_data_path
+            )

        for part_id in range(num_parts):
-            part_files = tmp_part_metadata['part-{}'.format(part_id)]
-            part_files['edge_feats'] = '{}/part{}/edge_feat.dgl'.format(args.rel_data_path, part_id)
-            part_files['node_feats'] = '{}/part{}/node_feat.dgl'.format(args.rel_data_path, part_id)
-            part_files['part_graph'] = '{}/part{}/graph.dgl'.format(args.rel_data_path, part_id)
-    tmp_part_config = '/tmp/{}.json'.format(graph_name)
-    with open(tmp_part_config, 'w') as outfile:
+            part_files = tmp_part_metadata["part-{}".format(part_id)]
+            part_files["edge_feats"] = "{}/part{}/edge_feat.dgl".format(
+                args.rel_data_path, part_id
+            )
+            part_files["node_feats"] = "{}/part{}/node_feat.dgl".format(
+                args.rel_data_path, part_id
+            )
+            part_files["part_graph"] = "{}/part{}/graph.dgl".format(
+                args.rel_data_path, part_id
+            )
+    tmp_part_config = "/tmp/{}.json".format(graph_name)
+    with open(tmp_part_config, "w") as outfile:
        json.dump(tmp_part_metadata, outfile, sort_keys=True, indent=4)

    # Copy ip config.
    for part_id, ip in enumerate(hosts):
-        remote_path = '{}/{}'.format(args.workspace, args.rel_data_path)
-        exec_cmd(ip, 'mkdir -p {}'.format(remote_path))
+        remote_path = "{}/{}".format(args.workspace, args.rel_data_path)
+        exec_cmd(ip, "mkdir -p {}".format(remote_path))

        copy_file(args.ip_config, ip, args.workspace)
-        copy_file(tmp_part_config, ip, '{}/{}'.format(args.workspace, args.rel_data_path))
-        node_map = part_metadata['node_map']
-        edge_map = part_metadata['edge_map']
+        copy_file(
+            tmp_part_config,
+            ip,
+            "{}/{}".format(args.workspace, args.rel_data_path),
+        )
+        node_map = part_metadata["node_map"]
+        edge_map = part_metadata["edge_map"]
        if not isinstance(node_map, dict):
-            copy_file(node_map, ip, tmp_part_metadata['node_map'])
+            copy_file(node_map, ip, tmp_part_metadata["node_map"])
        if not isinstance(edge_map, dict):
-            copy_file(edge_map, ip, tmp_part_metadata['edge_map'])
-        remote_path = '{}/{}/part{}'.format(args.workspace, args.rel_data_path, part_id)
-        exec_cmd(ip, 'mkdir -p {}'.format(remote_path))
-
-        part_files = part_metadata['part-{}'.format(part_id)]
-        copy_file(part_files['node_feats'], ip, remote_path)
-        copy_file(part_files['edge_feats'], ip, remote_path)
-        copy_file(part_files['part_graph'], ip, remote_path)
+            copy_file(edge_map, ip, tmp_part_metadata["edge_map"])
+        remote_path = "{}/{}/part{}".format(
+            args.workspace, args.rel_data_path, part_id
+        )
+        exec_cmd(ip, "mkdir -p {}".format(remote_path))
+
+        part_files = part_metadata["part-{}".format(part_id)]
+        copy_file(part_files["node_feats"], ip, remote_path)
+        copy_file(part_files["edge_feats"], ip, remote_path)
+        copy_file(part_files["part_graph"], ip, remote_path)
        # copy script folder
-        copy_file(args.script_folder, ip, args.workspace, '-r')
+        copy_file(args.script_folder, ip, args.workspace, "-r")


 def signal_handler(signal, frame):
-    logging.info('Stop copying')
+    logging.info("Stop copying")
    sys.exit(0)

-if __name__ == '__main__':
-    fmt = '%(asctime)s %(levelname)s %(message)s'
+
+if __name__ == "__main__":
+    fmt = "%(asctime)s %(levelname)s %(message)s"
    logging.basicConfig(format=fmt, level=logging.INFO)
    signal.signal(signal.SIGINT, signal_handler)
    main()
--- a/tools/dispatch_data.py
+++ b/tools/dispatch_data.py
 """Launching distributed graph partitioning pipeline """
-import os
-import sys
 import argparse
-import logging
 import json
+import logging
+import os
+import sys
+
 from partition_algo.base import load_partition_meta

-INSTALL_DIR = os.path.abspath(os.path.join(__file__, '..'))
+INSTALL_DIR = os.path.abspath(os.path.join(__file__, ".."))
 LAUNCH_SCRIPT = "distgraphlaunch.py"
 PIPELINE_SCRIPT = "distpartitioning/data_proc_pipeline.py"

@@ -23,6 +24,7 @@ LARG_IPCONF = "ip_config"
 LARG_MASTER_PORT = "master_port"
 LARG_SSH_PORT = "ssh_port"

+
 def get_launch_cmd(args) -> str:
    cmd = sys.executable + " " + os.path.join(INSTALL_DIR, LAUNCH_SCRIPT)
    cmd = f"{cmd} --{LARG_SSH_PORT} {args.ssh_port} "
@@ -34,7 +36,7 @@ def get_launch_cmd(args) -> str:


 def submit_jobs(args) -> str:
-    #read the json file and get the remaining argument here.
+    # read the json file and get the remaining argument here.
    schema_path = "metadata.json"
    with open(os.path.join(args.in_dir, schema_path)) as schema:
        schema_map = json.load(schema)
@@ -49,17 +51,22 @@ def submit_jobs(args) -> str:
        part_meta = load_partition_meta(partition_path)
        num_parts = part_meta.num_parts
    if num_parts > num_chunks:
-        raise Exception('Number of partitions should be less/equal than number of chunks.')
+        raise Exception(
+            "Number of partitions should be less/equal than number of chunks."
+        )

    # verify ip_config
-    with open(args.ip_config, 'r') as f:
+    with open(args.ip_config, "r") as f:
        num_ips = len(f.readlines())
-        assert num_ips == num_parts, \
-            f'The number of lines[{args.ip_config}] should be equal to num_parts[{num_parts}].'
+        assert (
+            num_ips == num_parts
+        ), f"The number of lines[{args.ip_config}] should be equal to num_parts[{num_parts}]."

    argslist = ""
    argslist += "--world-size {} ".format(num_parts)
-    argslist += "--partitions-dir {} ".format(os.path.abspath(args.partitions_dir))
+    argslist += "--partitions-dir {} ".format(
+        os.path.abspath(args.partitions_dir)
+    )
    argslist += "--input-dir {} ".format(os.path.abspath(args.in_dir))
    argslist += "--graph-name {} ".format(graph_name)
    argslist += "--schema {} ".format(schema_path)
@@ -75,28 +82,73 @@ def submit_jobs(args) -> str:
    udf_cmd = f"{args.python_path} {pipeline_cmd} {argslist}"

    launch_cmd = get_launch_cmd(args)
-    launch_cmd += '\"'+udf_cmd+'\"'
+    launch_cmd += '"' + udf_cmd + '"'

    print(launch_cmd)
    os.system(launch_cmd)

+
 def main():
-    parser = argparse.ArgumentParser(description='Dispatch edge index and data to partitions', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument('--in-dir', type=str, help='Location of the input directory where the dataset is located')
-    parser.add_argument('--partitions-dir', type=str, help='Location of the partition-id mapping files which define node-ids and their respective partition-ids, relative to the input directory')
-    parser.add_argument('--out-dir', type=str, help='Location of the output directory where the graph partitions will be created by this pipeline')
-    parser.add_argument('--ip-config', type=str, help='File location of IP configuration for server processes')
-    parser.add_argument('--master-port', type=int, default=12345, help='port used by gloo group to create randezvous point')
-    parser.add_argument('--log-level', type=str, default="info", help='To enable log level for debugging purposes. Available options: (Critical, Error, Warning, Info, Debug, Notset)')
-    parser.add_argument('--python-path', type=str, default=sys.executable, help='Path to the Python executable on all workers')
-    parser.add_argument('--ssh-port', type=int, default=22, help='SSH Port.') 
-    parser.add_argument('--process-group-timeout', type=int, default=1800,
-                        help='timeout[seconds] for operations executed against the process group')
-    parser.add_argument('--save-orig-nids', action='store_true',
-                        help='Save original node IDs into files')
-    parser.add_argument('--save-orig-eids', action='store_true',
-                        help='Save original edge IDs into files')
+    parser = argparse.ArgumentParser(
+        description="Dispatch edge index and data to partitions",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--in-dir",
+        type=str,
+        help="Location of the input directory where the dataset is located",
+    )
+    parser.add_argument(
+        "--partitions-dir",
+        type=str,
+        help="Location of the partition-id mapping files which define node-ids and their respective partition-ids, relative to the input directory",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=str,
+        help="Location of the output directory where the graph partitions will be created by this pipeline",
+    )
+    parser.add_argument(
+        "--ip-config",
+        type=str,
+        help="File location of IP configuration for server processes",
+    )
+    parser.add_argument(
+        "--master-port",
+        type=int,
+        default=12345,
+        help="port used by gloo group to create randezvous point",
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="info",
+        help="To enable log level for debugging purposes. Available options: (Critical, Error, Warning, Info, Debug, Notset)",
+    )
+    parser.add_argument(
+        "--python-path",
+        type=str,
+        default=sys.executable,
+        help="Path to the Python executable on all workers",
+    )
+    parser.add_argument("--ssh-port", type=int, default=22, help="SSH Port.")
+    parser.add_argument(
+        "--process-group-timeout",
+        type=int,
+        default=1800,
+        help="timeout[seconds] for operations executed against the process group",
+    )
+    parser.add_argument(
+        "--save-orig-nids",
+        action="store_true",
+        help="Save original node IDs into files",
+    )
+    parser.add_argument(
+        "--save-orig-eids",
+        action="store_true",
+        help="Save original edge IDs into files",
+    )

    args, udf_command = parser.parse_known_args()

@@ -109,7 +161,8 @@ def main():
    tokens = sys.executable.split(os.sep)
    submit_jobs(args)

-if __name__ == '__main__':
-    fmt = '%(asctime)s %(levelname)s %(message)s'
+
+if __name__ == "__main__":
+    fmt = "%(asctime)s %(levelname)s %(message)s"
    logging.basicConfig(format=fmt, level=logging.INFO)
    main()
--- a/tools/launch.py
+++ b/tools/launch.py
 """Launching tool for DGL distributed training"""
-import os
-import stat
-import sys
-import subprocess
 import argparse
-import signal
-import logging
-import time
 import json
+import logging
 import multiprocessing
+import os
 import queue
 import re
+import signal
+import stat
+import subprocess
+import sys
+import time
 from functools import partial
 from threading import Thread
 from typing import Optional

+
 def cleanup_proc(get_all_remote_pids, conn):
-    '''This process tries to clean up the remote training tasks.
-    '''
-    print('cleanupu process runs')
+    """This process tries to clean up the remote training tasks."""
+    print("cleanupu process runs")
    # This process should not handle SIGINT.
    signal.signal(signal.SIGINT, signal.SIG_IGN)

    data = conn.recv()
    # If the launch process exits normally, this process doesn't need to do anything.
-    if data == 'exit':
+    if data == "exit":
        sys.exit(0)
    else:
        remote_pids = get_all_remote_pids()
        # Otherwise, we need to ssh to each machine and kill the training jobs.
        for (ip, port), pids in remote_pids.items():
            kill_process(ip, port, pids)
-    print('cleanup process exits')
+    print("cleanup process exits")
+

 def kill_process(ip, port, pids):
-    '''ssh to a remote machine and kill the specified processes.
-    '''
+    """ssh to a remote machine and kill the specified processes."""
    curr_pid = os.getpid()
    killed_pids = []
    # If we kill child processes first, the parent process may create more again. This happens
@@ -43,8 +43,14 @@ def kill_process(ip, port, pids):
    pids.sort()
    for pid in pids:
        assert curr_pid != pid
-        print('kill process {} on {}:{}'.format(pid, ip, port), flush=True)
-        kill_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'kill {}\''.format(pid)
+        print("kill process {} on {}:{}".format(pid, ip, port), flush=True)
+        kill_cmd = (
+            "ssh -o StrictHostKeyChecking=no -p "
+            + str(port)
+            + " "
+            + ip
+            + " 'kill {}'".format(pid)
+        )
        subprocess.run(kill_cmd, shell=True)
        killed_pids.append(pid)
    # It's possible that some of the processes are not killed. Let's try again.
@@ -55,30 +61,45 @@ def kill_process(ip, port, pids):
        else:
            killed_pids.sort()
            for pid in killed_pids:
-                print('kill process {} on {}:{}'.format(pid, ip, port), flush=True)
-                kill_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'kill -9 {}\''.format(pid)
+                print(
+                    "kill process {} on {}:{}".format(pid, ip, port), flush=True
+                )
+                kill_cmd = (
+                    "ssh -o StrictHostKeyChecking=no -p "
+                    + str(port)
+                    + " "
+                    + ip
+                    + " 'kill -9 {}'".format(pid)
+                )
                subprocess.run(kill_cmd, shell=True)

+
 def get_killed_pids(ip, port, killed_pids):
-    '''Get the process IDs that we want to kill but are still alive.
-    '''
+    """Get the process IDs that we want to kill but are still alive."""
    killed_pids = [str(pid) for pid in killed_pids]
-    killed_pids = ','.join(killed_pids)
-    ps_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'ps -p {} -h\''.format(killed_pids)
+    killed_pids = ",".join(killed_pids)
+    ps_cmd = (
+        "ssh -o StrictHostKeyChecking=no -p "
+        + str(port)
+        + " "
+        + ip
+        + " 'ps -p {} -h'".format(killed_pids)
+    )
    res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE)
    pids = []
-    for p in res.stdout.decode('utf-8').split('\n'):
+    for p in res.stdout.decode("utf-8").split("\n"):
        l = p.split()
        if len(l) > 0:
            pids.append(int(l[0]))
    return pids

+
 def execute_remote(
    cmd: str,
    state_q: queue.Queue,
    ip: str,
    port: int,
-    username: Optional[str] = ""
+    username: Optional[str] = "",
 ) -> Thread:
    """Execute command line on remote machine via ssh.

@@ -118,22 +139,34 @@ def execute_remote(
        except Exception:
            state_q.put(-1)

-    thread = Thread(target=run, args=(ssh_cmd, state_q,))
+    thread = Thread(
+        target=run,
+        args=(
+            ssh_cmd,
+            state_q,
+        ),
+    )
    thread.setDaemon(True)
    thread.start()
    # sleep for a while in case of ssh is rejected by peer due to busy connection
    time.sleep(0.2)
    return thread

+
 def get_remote_pids(ip, port, cmd_regex):
-    """Get the process IDs that run the command in the remote machine.
-    """
+    """Get the process IDs that run the command in the remote machine."""
    pids = []
    curr_pid = os.getpid()
    # Here we want to get the python processes. We may get some ssh processes, so we should filter them out.
-    ps_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'ps -aux | grep python | grep -v StrictHostKeyChecking\''
+    ps_cmd = (
+        "ssh -o StrictHostKeyChecking=no -p "
+        + str(port)
+        + " "
+        + ip
+        + " 'ps -aux | grep python | grep -v StrictHostKeyChecking'"
+    )
    res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE)
-    for p in res.stdout.decode('utf-8').split('\n'):
+    for p in res.stdout.decode("utf-8").split("\n"):
        l = p.split()
        if len(l) < 2:
            continue
@@ -142,28 +175,34 @@ def get_remote_pids(ip, port, cmd_regex):
        if res is not None and int(l[1]) != curr_pid:
            pids.append(l[1])

-    pid_str = ','.join([str(pid) for pid in pids])
-    ps_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'pgrep -P {}\''.format(pid_str)
+    pid_str = ",".join([str(pid) for pid in pids])
+    ps_cmd = (
+        "ssh -o StrictHostKeyChecking=no -p "
+        + str(port)
+        + " "
+        + ip
+        + " 'pgrep -P {}'".format(pid_str)
+    )
    res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE)
-    pids1 = res.stdout.decode('utf-8').split('\n')
+    pids1 = res.stdout.decode("utf-8").split("\n")
    all_pids = []
    for pid in set(pids + pids1):
-        if pid == '' or int(pid) == curr_pid:
+        if pid == "" or int(pid) == curr_pid:
            continue
        all_pids.append(int(pid))
    all_pids.sort()
    return all_pids

+
 def get_all_remote_pids(hosts, ssh_port, udf_command):
-    '''Get all remote processes.
-    '''
+    """Get all remote processes."""
    remote_pids = {}
    for node_id, host in enumerate(hosts):
        ip, _ = host
        # When creating training processes in remote machines, we may insert some arguments
        # in the commands. We need to use regular expressions to match the modified command.
        cmds = udf_command.split()
-        new_udf_command = ' .*'.join(cmds)
+        new_udf_command = " .*".join(cmds)
        pids = get_remote_pids(ip, ssh_port, new_udf_command)
        remote_pids[(ip, ssh_port)] = pids
    return remote_pids
@@ -174,7 +213,7 @@ def construct_torch_dist_launcher_cmd(
    num_nodes: int,
    node_rank: int,
    master_addr: str,
-    master_port: int
+    master_port: int,
 ) -> str:
    """Constructs the torch distributed launcher command.
    Helper function.
@@ -189,18 +228,20 @@ def construct_torch_dist_launcher_cmd(
    Returns:
        cmd_str.
    """
-    torch_cmd_template = "-m torch.distributed.launch " \
-                         "--nproc_per_node={nproc_per_node} " \
-                         "--nnodes={nnodes} " \
-                         "--node_rank={node_rank} " \
-                         "--master_addr={master_addr} " \
-                         "--master_port={master_port}"
+    torch_cmd_template = (
+        "-m torch.distributed.launch "
+        "--nproc_per_node={nproc_per_node} "
+        "--nnodes={nnodes} "
+        "--node_rank={node_rank} "
+        "--master_addr={master_addr} "
+        "--master_port={master_port}"
+    )
    return torch_cmd_template.format(
        nproc_per_node=num_trainers,
        nnodes=num_nodes,
        node_rank=node_rank,
        master_addr=master_addr,
-        master_port=master_port
+        master_port=master_port,
    )


@@ -243,7 +284,7 @@ def wrap_udf_in_torch_dist_launcher(
        num_nodes=num_nodes,
        node_rank=node_rank,
        master_addr=master_addr,
-        master_port=master_port
+        master_port=master_port,
    )
    # Auto-detect the python binary that kicks off the distributed trainer code.
    # Note: This allowlist order matters, this will match with the FIRST matching entry. Thus, please add names to this
@@ -251,9 +292,14 @@ def wrap_udf_in_torch_dist_launcher(
    #           (python3.7, python3.8) -> (python3)
    # The allowed python versions are from this: https://www.dgl.ai/pages/start.html
    python_bin_allowlist = (
-        "python3.6", "python3.7", "python3.8", "python3.9", "python3",
+        "python3.6",
+        "python3.7",
+        "python3.8",
+        "python3.9",
+        "python3",
        # for backwards compatibility, accept python2 but technically DGL is a py3 library, so this is not recommended
-        "python2.7", "python2",
+        "python2.7",
+        "python2",
    )
    # If none of the candidate python bins match, then we go with the default `python`
    python_bin = "python"
@@ -268,7 +314,9 @@ def wrap_udf_in_torch_dist_launcher(
    #     python -m torch.distributed.launch [DIST TORCH ARGS] path/to/dist_trainer.py arg0 arg1
    # Note: if there are multiple python commands in `udf_command`, this may do the Wrong Thing, eg launch each
    #       python command within the torch distributed launcher.
-    new_udf_command = udf_command.replace(python_bin, f"{python_bin} {torch_dist_cmd}")
+    new_udf_command = udf_command.replace(
+        python_bin, f"{python_bin} {torch_dist_cmd}"
+    )

    return new_udf_command

@@ -425,6 +473,7 @@ def wrap_cmd_with_local_envvars(cmd: str, env_vars: str) -> str:
    #     https://stackoverflow.com/a/45993803
    return f"(export {env_vars}; {cmd})"

+
 def wrap_cmd_with_extra_envvars(cmd: str, env_vars: list) -> str:
    """Wraps a CLI command with extra env vars

@@ -448,6 +497,7 @@ def wrap_cmd_with_extra_envvars(cmd: str, env_vars: list) -> str:
 g_monitor_file = None
 g_group_id = 0

+
 def has_alive_servers(args):
    """Check whether there exists alive servers.

@@ -467,23 +517,32 @@ def has_alive_servers(args):
        return False
    global g_monitor_file
    global g_group_id
-    monitor_file = '/tmp/dgl_dist_monitor_' + args.server_name
+    monitor_file = "/tmp/dgl_dist_monitor_" + args.server_name
    from filelock import FileLock
-    lock = FileLock(monitor_file + '.lock')
+
+    lock = FileLock(monitor_file + ".lock")
    with lock:
        next_group_id = None
        ret = os.path.exists(monitor_file)
        if ret:
-            print("Monitor file for alive servers already exist: {}.".format(monitor_file))
-            lines = [line.rstrip('\n') for line in open(monitor_file)]
+            print(
+                "Monitor file for alive servers already exist: {}.".format(
+                    monitor_file
+                )
+            )
+            lines = [line.rstrip("\n") for line in open(monitor_file)]
            g_group_id = int(lines[0])
            next_group_id = g_group_id + 1
        if not ret and args.keep_alive:
            next_group_id = 1
-            print("Monitor file for alive servers is created: {}.".format(monitor_file))
+            print(
+                "Monitor file for alive servers is created: {}.".format(
+                    monitor_file
+                )
+            )
            g_monitor_file = monitor_file
        if next_group_id is not None:
-            with open(monitor_file, 'w') as f:
+            with open(monitor_file, "w") as f:
                f.write(str(next_group_id))
    return ret

@@ -494,14 +553,24 @@ def clean_alive_servers():
    try:
        if g_monitor_file is not None:
            os.remove(g_monitor_file)
-            os.remove(g_monitor_file + '.lock')
-            print("Monitor file for alive servers is removed: {}.".format(g_monitor_file))
+            os.remove(g_monitor_file + ".lock")
+            print(
+                "Monitor file for alive servers is removed: {}.".format(
+                    g_monitor_file
+                )
+            )
    except:
-        print("Failed to delete monitor file for alive servers: {}.".format(g_monitor_file))
+        print(
+            "Failed to delete monitor file for alive servers: {}.".format(
+                g_monitor_file
+            )
+        )
+

 def get_available_port(ip):
    """Get available port with specified ip."""
    import socket
+
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    for port in range(1234, 65535):
        try:
@@ -510,10 +579,13 @@ def get_available_port(ip):
            return port
    raise RuntimeError("Failed to get available port for ip~{}".format(ip))

+
 def submit_jobs(args, udf_command, dry_run=False):
    """Submit distributed jobs (server and client processes) via ssh"""
    if dry_run:
-        print("Currently it's in dry run mode which means no jobs will be launched.")
+        print(
+            "Currently it's in dry run mode which means no jobs will be launched."
+        )
    servers_cmd = []
    clients_cmd = []
    hosts = []
@@ -540,10 +612,11 @@ def submit_jobs(args, udf_command, dry_run=False):
    part_config = os.path.join(args.workspace, args.part_config)
    with open(part_config) as conf_f:
        part_metadata = json.load(conf_f)
-    assert 'num_parts' in part_metadata, 'num_parts does not exist.'
+    assert "num_parts" in part_metadata, "num_parts does not exist."
    # The number of partitions must match the number of machines in the cluster.
-    assert part_metadata['num_parts'] == len(hosts), \
-            'The number of graph partitions has to match the number of machines in the cluster.'
+    assert part_metadata["num_parts"] == len(
+        hosts
+    ), "The number of graph partitions has to match the number of machines in the cluster."

    state_q = queue.Queue()
    tot_num_clients = args.num_trainers * (1 + args.num_samplers) * len(hosts)
@@ -564,11 +637,23 @@ def submit_jobs(args, udf_command, dry_run=False):
            ip, _ = hosts[int(i / server_count_per_machine)]
            server_env_vars_cur = f"{server_env_vars} DGL_SERVER_ID={i}"
            cmd = wrap_cmd_with_local_envvars(udf_command, server_env_vars_cur)
-            cmd = wrap_cmd_with_extra_envvars(cmd, args.extra_envs) if len(args.extra_envs) > 0 else cmd
-            cmd = 'cd ' + str(args.workspace) + '; ' + cmd
+            cmd = (
+                wrap_cmd_with_extra_envvars(cmd, args.extra_envs)
+                if len(args.extra_envs) > 0
+                else cmd
+            )
+            cmd = "cd " + str(args.workspace) + "; " + cmd
            servers_cmd.append(cmd)
            if not dry_run:
-                thread_list.append(execute_remote(cmd, state_q, ip, args.ssh_port, username=args.ssh_username))
+                thread_list.append(
+                    execute_remote(
+                        cmd,
+                        state_q,
+                        ip,
+                        args.ssh_port,
+                        username=args.ssh_username,
+                    )
+                )
    else:
        print(f"Use running server {args.server_name}.")

@@ -580,7 +665,9 @@ def submit_jobs(args, udf_command, dry_run=False):
        ip_config=args.ip_config,
        num_servers=args.num_servers,
        graph_format=args.graph_format,
-        num_omp_threads=os.environ.get("OMP_NUM_THREADS", str(args.num_omp_threads)),
+        num_omp_threads=os.environ.get(
+            "OMP_NUM_THREADS", str(args.num_omp_threads)
+        ),
        group_id=g_group_id,
        pythonpath=os.environ.get("PYTHONPATH", ""),
    )
@@ -596,31 +683,42 @@ def submit_jobs(args, udf_command, dry_run=False):
            num_nodes=len(hosts),
            node_rank=node_id,
            master_addr=master_addr,
-            master_port=master_port
+            master_port=master_port,
+        )
+        cmd = wrap_cmd_with_local_envvars(
+            torch_dist_udf_command, client_env_vars
        )
-        cmd = wrap_cmd_with_local_envvars(torch_dist_udf_command, client_env_vars)
-        cmd = wrap_cmd_with_extra_envvars(cmd, args.extra_envs) if len(args.extra_envs) > 0 else cmd
-        cmd = 'cd ' + str(args.workspace) + '; ' + cmd
+        cmd = (
+            wrap_cmd_with_extra_envvars(cmd, args.extra_envs)
+            if len(args.extra_envs) > 0
+            else cmd
+        )
+        cmd = "cd " + str(args.workspace) + "; " + cmd
        clients_cmd.append(cmd)
        if not dry_run:
-            thread_list.append(execute_remote(cmd, state_q, ip, args.ssh_port, username=args.ssh_username))
+            thread_list.append(
+                execute_remote(
+                    cmd, state_q, ip, args.ssh_port, username=args.ssh_username
+                )
+            )

    # return commands of clients/servers directly if in dry run mode
    if dry_run:
        return clients_cmd, servers_cmd

    # Start a cleanup process dedicated for cleaning up remote training jobs.
-    conn1,conn2 = multiprocessing.Pipe()
+    conn1, conn2 = multiprocessing.Pipe()
    func = partial(get_all_remote_pids, hosts, args.ssh_port, udf_command)
    process = multiprocessing.Process(target=cleanup_proc, args=(func, conn1))
    process.start()

    def signal_handler(signal, frame):
-        logging.info('Stop launcher')
+        logging.info("Stop launcher")
        # We need to tell the cleanup process to kill remote training jobs.
-        conn2.send('cleanup')
+        conn2.send("cleanup")
        clean_alive_servers()
        sys.exit(0)
+
    signal.signal(signal.SIGINT, signal_handler)

    err = 0
@@ -633,81 +731,144 @@ def submit_jobs(args, udf_command, dry_run=False):
            err = err_code

    # The training processes complete. We should tell the cleanup process to exit.
-    conn2.send('exit')
+    conn2.send("exit")
    process.join()
    if err != 0:
        print("Task failed")
        sys.exit(-1)

+
 def main():
-    parser = argparse.ArgumentParser(description='Launch a distributed job')
-    parser.add_argument('--ssh_port', type=int, default=22, help='SSH Port.')
+    parser = argparse.ArgumentParser(description="Launch a distributed job")
+    parser.add_argument("--ssh_port", type=int, default=22, help="SSH Port.")
    parser.add_argument(
-        "--ssh_username", default="",
+        "--ssh_username",
+        default="",
        help="Optional. When issuing commands (via ssh) to cluster, use the provided username in the ssh cmd. "
-             "Example: If you provide --ssh_username=bob, then the ssh command will be like: 'ssh bob@1.2.3.4 CMD' "
-             "instead of 'ssh 1.2.3.4 CMD'"
+        "Example: If you provide --ssh_username=bob, then the ssh command will be like: 'ssh bob@1.2.3.4 CMD' "
+        "instead of 'ssh 1.2.3.4 CMD'",
    )
-    parser.add_argument('--workspace', type=str,
-                        help='Path of user directory of distributed tasks. \
+    parser.add_argument(
+        "--workspace",
+        type=str,
+        help="Path of user directory of distributed tasks. \
                        This is used to specify a destination location where \
-                        the contents of current directory will be rsyncd')
-    parser.add_argument('--num_trainers', type=int,
-                        help='The number of trainer processes per machine')
-    parser.add_argument('--num_omp_threads', type=int,
-                        help='The number of OMP threads per trainer')
-    parser.add_argument('--num_samplers', type=int, default=0,
-                        help='The number of sampler processes per trainer process')
-    parser.add_argument('--num_servers', type=int,
-                        help='The number of server processes per machine')
-    parser.add_argument('--part_config', type=str,
-                        help='The file (in workspace) of the partition config')
-    parser.add_argument('--ip_config', type=str,
-                        help='The file (in workspace) of IP configuration for server processes')
-    parser.add_argument('--num_server_threads', type=int, default=1,
-                        help='The number of OMP threads in the server process. \
+                        the contents of current directory will be rsyncd",
+    )
+    parser.add_argument(
+        "--num_trainers",
+        type=int,
+        help="The number of trainer processes per machine",
+    )
+    parser.add_argument(
+        "--num_omp_threads",
+        type=int,
+        help="The number of OMP threads per trainer",
+    )
+    parser.add_argument(
+        "--num_samplers",
+        type=int,
+        default=0,
+        help="The number of sampler processes per trainer process",
+    )
+    parser.add_argument(
+        "--num_servers",
+        type=int,
+        help="The number of server processes per machine",
+    )
+    parser.add_argument(
+        "--part_config",
+        type=str,
+        help="The file (in workspace) of the partition config",
+    )
+    parser.add_argument(
+        "--ip_config",
+        type=str,
+        help="The file (in workspace) of IP configuration for server processes",
+    )
+    parser.add_argument(
+        "--num_server_threads",
+        type=int,
+        default=1,
+        help="The number of OMP threads in the server process. \
                        It should be small if server processes and trainer processes run on \
-                        the same machine. By default, it is 1.')
-    parser.add_argument('--graph_format', type=str, default='csc',
-                        help='The format of the graph structure of each partition. \
+                        the same machine. By default, it is 1.",
+    )
+    parser.add_argument(
+        "--graph_format",
+        type=str,
+        default="csc",
+        help='The format of the graph structure of each partition. \
                        The allowed formats are csr, csc and coo. A user can specify multiple \
-                        formats, separated by ",". For example, the graph format is "csr,csc".')
-    parser.add_argument('--extra_envs', nargs='+', type=str, default=[],
-                        help='Extra environment parameters need to be set. For example, \
+                        formats, separated by ",". For example, the graph format is "csr,csc".',
+    )
+    parser.add_argument(
+        "--extra_envs",
+        nargs="+",
+        type=str,
+        default=[],
+        help="Extra environment parameters need to be set. For example, \
                        you can set the LD_LIBRARY_PATH and NCCL_DEBUG by adding: \
-                        --extra_envs LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH NCCL_DEBUG=INFO ')
-    parser.add_argument('--keep_alive', action='store_true', help='Servers keep alive when clients exit')
-    parser.add_argument('--server_name', type=str,
-                        help='Used to check whether there exist alive servers')
+                        --extra_envs LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH NCCL_DEBUG=INFO ",
+    )
+    parser.add_argument(
+        "--keep_alive",
+        action="store_true",
+        help="Servers keep alive when clients exit",
+    )
+    parser.add_argument(
+        "--server_name",
+        type=str,
+        help="Used to check whether there exist alive servers",
+    )
    args, udf_command = parser.parse_known_args()
    if args.keep_alive:
-        assert args.server_name is not None, "Server name is required if '--keep_alive' is enabled."
+        assert (
+            args.server_name is not None
+        ), "Server name is required if '--keep_alive' is enabled."
        print("Servers will keep alive even clients exit...")
-    assert len(udf_command) == 1, 'Please provide user command line.'
-    assert args.num_trainers is not None and args.num_trainers > 0, \
-            '--num_trainers must be a positive number.'
-    assert args.num_samplers is not None and args.num_samplers >= 0, \
-            '--num_samplers must be a non-negative number.'
-    assert args.num_servers is not None and args.num_servers > 0, \
-            '--num_servers must be a positive number.'
-    assert args.num_server_threads > 0, '--num_server_threads must be a positive number.'
-    assert args.workspace is not None, 'A user has to specify a workspace with --workspace.'
-    assert args.part_config is not None, \
-            'A user has to specify a partition configuration file with --part_config.'
-    assert args.ip_config is not None, \
-            'A user has to specify an IP configuration file with --ip_config.'
+    assert len(udf_command) == 1, "Please provide user command line."
+    assert (
+        args.num_trainers is not None and args.num_trainers > 0
+    ), "--num_trainers must be a positive number."
+    assert (
+        args.num_samplers is not None and args.num_samplers >= 0
+    ), "--num_samplers must be a non-negative number."
+    assert (
+        args.num_servers is not None and args.num_servers > 0
+    ), "--num_servers must be a positive number."
+    assert (
+        args.num_server_threads > 0
+    ), "--num_server_threads must be a positive number."
+    assert (
+        args.workspace is not None
+    ), "A user has to specify a workspace with --workspace."
+    assert (
+        args.part_config is not None
+    ), "A user has to specify a partition configuration file with --part_config."
+    assert (
+        args.ip_config is not None
+    ), "A user has to specify an IP configuration file with --ip_config."
    if args.num_omp_threads is None:
        # Here we assume all machines have the same number of CPU cores as the machine
        # where the launch script runs.
-        args.num_omp_threads = max(multiprocessing.cpu_count() // 2 // args.num_trainers, 1)
-        print('The number of OMP threads per trainer is set to', args.num_omp_threads)
+        args.num_omp_threads = max(
+            multiprocessing.cpu_count() // 2 // args.num_trainers, 1
+        )
+        print(
+            "The number of OMP threads per trainer is set to",
+            args.num_omp_threads,
+        )

    udf_command = str(udf_command[0])
-    if 'python' not in udf_command:
-        raise RuntimeError("DGL launching script can only support Python executable file.")
+    if "python" not in udf_command:
+        raise RuntimeError(
+            "DGL launching script can only support Python executable file."
+        )
    submit_jobs(args, udf_command)

-if __name__ == '__main__':
-    fmt = '%(asctime)s %(levelname)s %(message)s'
+
+if __name__ == "__main__":
+    fmt = "%(asctime)s %(levelname)s %(message)s"
    logging.basicConfig(format=fmt, level=logging.INFO)
    main()
--- a/tools/partition_algo/random_partition.py
+++ b/tools/partition_algo/random_partition.py
 # Requires setting PYTHONPATH=${GITROOT}/tools
+import argparse
 import json
 import logging
-import sys
 import os
-import numpy as np
-import argparse
+import sys

-from utils import setdir
-from utils import array_readwriter
+import numpy as np
 from base import PartitionMeta, dump_partition_meta
+from utils import array_readwriter, setdir
+

 def _random_partition(metadata, num_parts):
-    num_nodes_per_type = [sum(_) for _ in metadata['num_nodes_per_chunk']]
-    ntypes = metadata['node_type']
+    num_nodes_per_type = [sum(_) for _ in metadata["num_nodes_per_chunk"]]
+    ntypes = metadata["node_type"]
    for ntype, n in zip(ntypes, num_nodes_per_type):
-        logging.info('Generating partition for node type %s' % ntype)
+        logging.info("Generating partition for node type %s" % ntype)
        parts = np.random.randint(0, num_parts, (n,))
-        array_readwriter.get_array_parser(name='csv').write(ntype + '.txt', parts)
+        array_readwriter.get_array_parser(name="csv").write(
+            ntype + ".txt", parts
+        )
+

 def random_partition(metadata, num_parts, output_path):
    """
@@ -31,22 +34,28 @@ def random_partition(metadata, num_parts, output_path):
    """
    with setdir(output_path):
        _random_partition(metadata, num_parts)
-        part_meta = PartitionMeta(version='1.0.0', num_parts=num_parts, algo_name='random')
-        dump_partition_meta(part_meta, 'partition_meta.json')
+        part_meta = PartitionMeta(
+            version="1.0.0", num_parts=num_parts, algo_name="random"
+        )
+        dump_partition_meta(part_meta, "partition_meta.json")
+

 # Run with PYTHONPATH=${GIT_ROOT_DIR}/tools
 # where ${GIT_ROOT_DIR} is the directory to the DGL git repository.
-if __name__ == '__main__':
+if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
-            '--in_dir', type=str, help='input directory that contains the metadata file')
-    parser.add_argument(
-            '--out_dir', type=str, help='output directory')
+        "--in_dir",
+        type=str,
+        help="input directory that contains the metadata file",
+    )
+    parser.add_argument("--out_dir", type=str, help="output directory")
    parser.add_argument(
-            '--num_partitions', type=int, help='number of partitions')
-    logging.basicConfig(level='INFO')
+        "--num_partitions", type=int, help="number of partitions"
+    )
+    logging.basicConfig(level="INFO")
    args = parser.parse_args()
-    with open(os.path.join(args.in_dir, 'metadata.json')) as f:
+    with open(os.path.join(args.in_dir, "metadata.json")) as f:
        metadata = json.load(f)
    num_parts = args.num_partitions
    random_partition(metadata, num_parts, args.out_dir)
--- a/tools/utils/__init__.py
+++ b/tools/utils/__init__.py
-from .files import *
 from . import array_readwriter
+from .files import *
--- a/tools/utils/array_readwriter/__init__.py
+++ b/tools/utils/array_readwriter/__init__.py
-from .registry import register_array_parser, get_array_parser
-
-from . import csv
-from . import numpy_array
+from . import csv, numpy_array
+from .registry import get_array_parser, register_array_parser
--- a/tools/utils/array_readwriter/csv.py
+++ b/tools/utils/array_readwriter/csv.py
 import logging
+
 import pandas as pd
 import pyarrow
 import pyarrow.csv
+
 from .registry import register_array_parser

+
 @register_array_parser("csv")
 class CSVArrayParser(object):
-    def __init__(self, delimiter=','):
+    def __init__(self, delimiter=","):
        self.delimiter = delimiter

    def read(self, path):
-        logging.info('Reading from %s using CSV format with configuration %s' % (
-            path, self.__dict__))
+        logging.info(
+            "Reading from %s using CSV format with configuration %s"
+            % (path, self.__dict__)
+        )
        # do not read the first line as header
        read_options = pyarrow.csv.ReadOptions(autogenerate_column_names=True)
        parse_options = pyarrow.csv.ParseOptions(delimiter=self.delimiter)
-        arr = pyarrow.csv.read_csv(path, read_options=read_options, parse_options=parse_options)
-        logging.info('Done reading from %s' % path)
+        arr = pyarrow.csv.read_csv(
+            path, read_options=read_options, parse_options=parse_options
+        )
+        logging.info("Done reading from %s" % path)
        return arr.to_pandas().to_numpy()

    def write(self, path, arr):
-        logging.info('Writing to %s using CSV format with configuration %s' % (
-            path, self.__dict__))
-        write_options = pyarrow.csv.WriteOptions(include_header=False, delimiter=self.delimiter)
+        logging.info(
+            "Writing to %s using CSV format with configuration %s"
+            % (path, self.__dict__)
+        )
+        write_options = pyarrow.csv.WriteOptions(
+            include_header=False, delimiter=self.delimiter
+        )
        arr = pyarrow.Table.from_pandas(pd.DataFrame(arr))
        pyarrow.csv.write_csv(arr, path, write_options=write_options)
-        logging.info('Done writing to %s' % path)
+        logging.info("Done writing to %s" % path)