[Misc] Black auto fix. (#4697)

Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

[Misc] Black auto fix. (#4697)
Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
ea48ce7a · Hongzhi (Steve), Chen · GitHub · bd3fe59e · ea48ce7a · ea48ce7a
Unverified Commit ea48ce7a authored Oct 11, 2022 by Hongzhi (Steve), Chen Committed by GitHub Oct 11, 2022
16 changed files
--- a/tests/scripts/ci_report/report.py
+++ b/tests/scripts/ci_report/report.py
-from urllib.parse import urlparse, urljoin
-import os
-import requests
-import pytest
-import json
 import enum
-from pathlib import Path
+import json
+import os
 import tempfile
+from pathlib import Path
+from urllib.parse import urljoin, urlparse
+
+import pytest
+import requests


 class JobStatus(enum.Enum):
@@ -27,8 +28,8 @@ JENKINS_STATUS_MAPPING = {

 assert "BUILD_URL" in os.environ, "Are you in the Jenkins environment?"
 job_link = os.environ["BUILD_URL"]
-response = requests.get('{}wfapi'.format(job_link), verify=False).json()
-domain = '{uri.scheme}://{uri.netloc}/'.format(uri=urlparse(job_link))
+response = requests.get("{}wfapi".format(job_link), verify=False).json()
+domain = "{uri.scheme}://{uri.netloc}/".format(uri=urlparse(job_link))
 stages = response["stages"]

 final_dict = {}
@@ -41,37 +42,38 @@ def get_jenkins_json(path):


 for stage in stages:
-    link = stage['_links']['self']['href']
-    stage_name = stage['name']
+    link = stage["_links"]["self"]["href"]
+    stage_name = stage["name"]
    res = requests.get(urljoin(domain, link), verify=False).json()
-    nodes = res['stageFlowNodes']
+    nodes = res["stageFlowNodes"]
    for node in nodes:
-        nodes_dict[node['id']] = node
-        nodes_dict[node['id']]['stageName'] = stage_name
+        nodes_dict[node["id"]] = node
+        nodes_dict[node["id"]]["stageName"] = stage_name
+

 def get_node_full_name(node, node_dict):
    name = ""
    while "parentNodes" in node:
        name = name + "/" + node["name"]
-        id = node['parentNodes'][0]
+        id = node["parentNodes"][0]
        if id in nodes_dict:
            node = node_dict[id]
        else:
            break
    return name

+
 for key, node in nodes_dict.items():
-    logs = get_jenkins_json(
-        node['_links']['log']['href']).get('text', '')
-    node_name = node['name']
-    if "Post Actions" in node['stageName']:
+    logs = get_jenkins_json(node["_links"]["log"]["href"]).get("text", "")
+    node_name = node["name"]
+    if "Post Actions" in node["stageName"]:
        continue
-    node_status = node['status']
-    id = node['id']
+    node_status = node["status"]
+    id = node["id"]
    full_name = get_node_full_name(node, nodes_dict)
-    final_dict["{}_{}/{}".format(id, node['stageName'], full_name)] = {
+    final_dict["{}_{}/{}".format(id, node["stageName"], full_name)] = {
        "status": JENKINS_STATUS_MAPPING[node_status],
-        "logs": logs
+        "logs": logs,
    }

 JOB_NAME = os.getenv("JOB_NAME")
@@ -85,15 +87,18 @@ prefix = f"https://dgl-ci-result.s3.us-west-2.amazonaws.com/{JOB_NAME}/{BUILD_NU
 def test_generate_report(test_name):
    os.makedirs("./logs_dir/", exist_ok=True)
    tmp = tempfile.NamedTemporaryFile(
-        mode='w', delete=False, suffix=".log", dir="./logs_dir/")
+        mode="w", delete=False, suffix=".log", dir="./logs_dir/"
+    )
    tmp.write(final_dict[test_name]["logs"])
    filename = Path(tmp.name).name
    # print(final_dict[test_name]["logs"])
-    print("Log path: {}".format(prefix+filename))
+    print("Log path: {}".format(prefix + filename))

    if final_dict[test_name]["status"] == JobStatus.FAIL:
        pytest.fail(
-            "Test failed. Please see the log at {}".format(prefix+filename))
+            "Test failed. Please see the log at {}".format(prefix + filename)
+        )
    elif final_dict[test_name]["status"] == JobStatus.SKIP:
        pytest.skip(
-            "Test skipped. Please see the log at {}".format(prefix+filename))
+            "Test skipped. Please see the log at {}".format(prefix + filename)
+        )
--- a/tests/scripts/ci_report/status.py
+++ b/tests/scripts/ci_report/status.py
 import os
+
 import requests
+
 JOB_NAME = os.getenv("JOB_NAME")
 BUILD_NUMBER = os.getenv("BUILD_NUMBER")
 BUILD_ID = os.getenv("BUILD_ID")
 COMMIT = os.getenv("GIT_COMMIT")

 job_link = os.environ["BUILD_URL"]
-response = requests.get('{}wfapi'.format(job_link), verify=False).json()
+response = requests.get("{}wfapi".format(job_link), verify=False).json()
 status = "✅ CI test succeeded"
-for v in response['stages']:
-    if v['status'] in ['FAILED', 'ABORTED']:
-        status = "❌ CI test failed in Stage [{}].".format(v['name'])
+for v in response["stages"]:
+    if v["status"] in ["FAILED", "ABORTED"]:
+        status = "❌ CI test failed in Stage [{}].".format(v["name"])
        break

 comment = f""" Commit ID: {COMMIT}\n

--- a/tests/tensorflow/test_basic.py
+++ b/tests/tensorflow/test_basic.py
 def test():
    pass

+
 if __name__ == "__main__":
-    test()
\ No newline at end of file
+    test()
--- a/tests/tensorflow/test_nn.py
+++ b/tests/tensorflow/test_nn.py
--- a/tests/test_utils/__init__.py
+++ b/tests/test_utils/__init__.py
-import pytest
 import backend as F
+import pytest

 parametrize_idtype = pytest.mark.parametrize("idtype", [F.int32, F.int64])


--- a/tests/test_utils/checks.py
+++ b/tests/test_utils/checks.py
-import dgl
 import backend as F

-__all__ = ['check_graph_equal']
+import dgl
+
+__all__ = ["check_graph_equal"]
+

-def check_graph_equal(g1, g2, *,
-                      check_idtype=True,
-                      check_feature=True):
+def check_graph_equal(g1, g2, *, check_idtype=True, check_feature=True):
    assert g1.device == g1.device
    if check_idtype:
        assert g1.idtype == g2.idtype
@@ -26,8 +26,8 @@ def check_graph_equal(g1, g2, *,
    for ety in g1.canonical_etypes:
        assert g1.number_of_edges(ety) == g2.number_of_edges(ety)
        assert F.allclose(g1.batch_num_edges(ety), g2.batch_num_edges(ety))
-        src1, dst1, eid1 = g1.edges(etype=ety, form='all')
-        src2, dst2, eid2 = g2.edges(etype=ety, form='all')
+        src1, dst1, eid1 = g1.edges(etype=ety, form="all")
+        src2, dst2, eid2 = g2.edges(etype=ety, form="all")
        if check_idtype:
            assert F.allclose(src1, src2)
            assert F.allclose(dst1, dst2)
@@ -42,9 +42,13 @@ def check_graph_equal(g1, g2, *,
            if g1.number_of_nodes(nty) == 0:
                continue
            for feat_name in g1.nodes[nty].data.keys():
-                assert F.allclose(g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name])
+                assert F.allclose(
+                    g1.nodes[nty].data[feat_name], g2.nodes[nty].data[feat_name]
+                )
        for ety in g1.canonical_etypes:
            if g1.number_of_edges(ety) == 0:
                continue
            for feat_name in g2.edges[ety].data.keys():
-                assert F.allclose(g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name])
+                assert F.allclose(
+                    g1.edges[ety].data[feat_name], g2.edges[ety].data[feat_name]
+                )
--- a/tests/tools/test_dist_part.py
+++ b/tests/tools/test_dist_part.py
@@ -3,14 +3,15 @@ import os
 import tempfile
 import unittest

-import dgl
 import numpy as np
 import pytest
 import torch
 from chunk_graph import chunk_graph
+from create_chunked_dataset import create_chunked_dataset
+
+import dgl
 from dgl.data.utils import load_graphs, load_tensors

-from create_chunked_dataset import create_chunked_dataset

 @pytest.mark.parametrize("num_chunks", [1, 8])
 def test_chunk_graph(num_chunks):
@@ -19,43 +20,43 @@ def test_chunk_graph(num_chunks):

        g = create_chunked_dataset(root_dir, num_chunks, include_edge_data=True)

-        num_cite_edges = g.number_of_edges('cites')
-        num_write_edges = g.number_of_edges('writes')
-        num_affiliate_edges = g.number_of_edges('affiliated_with')
+        num_cite_edges = g.number_of_edges("cites")
+        num_write_edges = g.number_of_edges("writes")
+        num_affiliate_edges = g.number_of_edges("affiliated_with")

-        num_institutions = g.number_of_nodes('institution')
-        num_authors = g.number_of_nodes('author')
-        num_papers = g.number_of_nodes('paper')
+        num_institutions = g.number_of_nodes("institution")
+        num_authors = g.number_of_nodes("author")
+        num_papers = g.number_of_nodes("paper")

        # check metadata.json
-        output_dir = os.path.join(root_dir, 'chunked-data')
-        json_file = os.path.join(output_dir, 'metadata.json')
+        output_dir = os.path.join(root_dir, "chunked-data")
+        json_file = os.path.join(output_dir, "metadata.json")
        assert os.path.isfile(json_file)
-        with open(json_file, 'rb') as f:
+        with open(json_file, "rb") as f:
            meta_data = json.load(f)
-        assert meta_data['graph_name'] == 'mag240m'
-        assert len(meta_data['num_nodes_per_chunk'][0]) == num_chunks
+        assert meta_data["graph_name"] == "mag240m"
+        assert len(meta_data["num_nodes_per_chunk"][0]) == num_chunks

        # check edge_index
-        output_edge_index_dir = os.path.join(output_dir, 'edge_index')
+        output_edge_index_dir = os.path.join(output_dir, "edge_index")
        for utype, etype, vtype in g.canonical_etypes:
-            fname = ':'.join([utype, etype, vtype])
+            fname = ":".join([utype, etype, vtype])
            for i in range(num_chunks):
                chunk_f_name = os.path.join(
-                    output_edge_index_dir, fname + str(i) + '.txt'
+                    output_edge_index_dir, fname + str(i) + ".txt"
                )
                assert os.path.isfile(chunk_f_name)
-                with open(chunk_f_name, 'r') as f:
+                with open(chunk_f_name, "r") as f:
                    header = f.readline()
-                    num1, num2 = header.rstrip().split(' ')
+                    num1, num2 = header.rstrip().split(" ")
                    assert isinstance(int(num1), int)
                    assert isinstance(int(num2), int)

        # check node_data
-        output_node_data_dir = os.path.join(output_dir, 'node_data', 'paper')
-        for feat in ['feat', 'label', 'year']:
+        output_node_data_dir = os.path.join(output_dir, "node_data", "paper")
+        for feat in ["feat", "label", "year"]:
            for i in range(num_chunks):
-                chunk_f_name = '{}-{}.npy'.format(feat, i)
+                chunk_f_name = "{}-{}.npy".format(feat, i)
                chunk_f_name = os.path.join(output_node_data_dir, chunk_f_name)
                assert os.path.isfile(chunk_f_name)
                feat_array = np.load(chunk_f_name)
@@ -63,19 +64,19 @@ def test_chunk_graph(num_chunks):

        # check edge_data
        num_edges = {
-            'paper:cites:paper': num_cite_edges,
-            'author:writes:paper': num_write_edges,
-            'paper:rev_writes:author': num_write_edges,
+            "paper:cites:paper": num_cite_edges,
+            "author:writes:paper": num_write_edges,
+            "paper:rev_writes:author": num_write_edges,
        }
-        output_edge_data_dir = os.path.join(output_dir, 'edge_data')
+        output_edge_data_dir = os.path.join(output_dir, "edge_data")
        for etype, feat in [
-            ['paper:cites:paper', 'count'],
-            ['author:writes:paper', 'year'],
-            ['paper:rev_writes:author', 'year'],
+            ["paper:cites:paper", "count"],
+            ["author:writes:paper", "year"],
+            ["paper:rev_writes:author", "year"],
        ]:
            output_edge_sub_dir = os.path.join(output_edge_data_dir, etype)
            for i in range(num_chunks):
-                chunk_f_name = '{}-{}.npy'.format(feat, i)
+                chunk_f_name = "{}-{}.npy".format(feat, i)
                chunk_f_name = os.path.join(output_edge_sub_dir, chunk_f_name)
                assert os.path.isfile(chunk_f_name)
                feat_array = np.load(chunk_f_name)
@@ -100,63 +101,63 @@ def test_part_pipeline(num_chunks, num_parts):
        all_ntypes = g.ntypes
        all_etypes = g.etypes

-        num_cite_edges = g.number_of_edges('cites')
-        num_write_edges = g.number_of_edges('writes')
-        num_affiliate_edges = g.number_of_edges('affiliated_with')
+        num_cite_edges = g.number_of_edges("cites")
+        num_write_edges = g.number_of_edges("writes")
+        num_affiliate_edges = g.number_of_edges("affiliated_with")

-        num_institutions = g.number_of_nodes('institution')
-        num_authors = g.number_of_nodes('author')
-        num_papers = g.number_of_nodes('paper')
+        num_institutions = g.number_of_nodes("institution")
+        num_authors = g.number_of_nodes("author")
+        num_papers = g.number_of_nodes("paper")

        # Step1: graph partition
-        in_dir = os.path.join(root_dir, 'chunked-data')
-        output_dir = os.path.join(root_dir, 'parted_data')
+        in_dir = os.path.join(root_dir, "chunked-data")
+        output_dir = os.path.join(root_dir, "parted_data")
        os.system(
-            'python3 tools/partition_algo/random_partition.py '
-            '--in_dir {} --out_dir {} --num_partitions {}'.format(
+            "python3 tools/partition_algo/random_partition.py "
+            "--in_dir {} --out_dir {} --num_partitions {}".format(
                in_dir, output_dir, num_parts
            )
        )
-        for ntype in ['author', 'institution', 'paper']:
-            fname = os.path.join(output_dir, '{}.txt'.format(ntype))
-            with open(fname, 'r') as f:
+        for ntype in ["author", "institution", "paper"]:
+            fname = os.path.join(output_dir, "{}.txt".format(ntype))
+            with open(fname, "r") as f:
                header = f.readline().rstrip()
                assert isinstance(int(header), int)

        # Step2: data dispatch
-        partition_dir = os.path.join(root_dir, 'parted_data')
-        out_dir = os.path.join(root_dir, 'partitioned')
-        ip_config = os.path.join(root_dir, 'ip_config.txt')
-        with open(ip_config, 'w') as f:
+        partition_dir = os.path.join(root_dir, "parted_data")
+        out_dir = os.path.join(root_dir, "partitioned")
+        ip_config = os.path.join(root_dir, "ip_config.txt")
+        with open(ip_config, "w") as f:
            for i in range(num_parts):
-                f.write(f'127.0.0.{i + 1}\n')
-
-        cmd = 'python3 tools/dispatch_data.py'
-        cmd += f' --in-dir {in_dir}'
-        cmd += f' --partitions-dir {partition_dir}'
-        cmd += f' --out-dir {out_dir}'
-        cmd += f' --ip-config {ip_config}'
-        cmd += ' --process-group-timeout 60'
-        cmd += ' --save-orig-nids'
-        cmd += ' --save-orig-eids'
+                f.write(f"127.0.0.{i + 1}\n")
+
+        cmd = "python3 tools/dispatch_data.py"
+        cmd += f" --in-dir {in_dir}"
+        cmd += f" --partitions-dir {partition_dir}"
+        cmd += f" --out-dir {out_dir}"
+        cmd += f" --ip-config {ip_config}"
+        cmd += " --process-group-timeout 60"
+        cmd += " --save-orig-nids"
+        cmd += " --save-orig-eids"
        os.system(cmd)

        # check metadata.json
-        meta_fname = os.path.join(out_dir, 'metadata.json')
-        with open(meta_fname, 'rb') as f:
+        meta_fname = os.path.join(out_dir, "metadata.json")
+        with open(meta_fname, "rb") as f:
            meta_data = json.load(f)

        for etype in all_etypes:
-            assert len(meta_data['edge_map'][etype]) == num_parts
-        assert meta_data['etypes'].keys() == set(all_etypes)
-        assert meta_data['graph_name'] == 'mag240m'
+            assert len(meta_data["edge_map"][etype]) == num_parts
+        assert meta_data["etypes"].keys() == set(all_etypes)
+        assert meta_data["graph_name"] == "mag240m"

        for ntype in all_ntypes:
-            assert len(meta_data['node_map'][ntype]) == num_parts
-        assert meta_data['ntypes'].keys() == set(all_ntypes)
-        assert meta_data['num_edges'] == g.num_edges()
-        assert meta_data['num_nodes'] == g.num_nodes()
-        assert meta_data['num_parts'] == num_parts
+            assert len(meta_data["node_map"][ntype]) == num_parts
+        assert meta_data["ntypes"].keys() == set(all_ntypes)
+        assert meta_data["num_edges"] == g.num_edges()
+        assert meta_data["num_nodes"] == g.num_nodes()
+        assert meta_data["num_parts"] == num_parts

        edge_dict = {}
        edge_data_gold = {}
@@ -165,7 +166,7 @@ def test_part_pipeline(num_chunks, num_parts):
            # Create Id Map here.
            num_edges = 0
            for utype, etype, vtype in g.canonical_etypes:
-                fname = ':'.join([utype, etype, vtype])
+                fname = ":".join([utype, etype, vtype])
                edge_dict[fname] = np.array(
                    [num_edges, num_edges + g.number_of_edges(etype)]
                ).reshape(1, 2)
@@ -177,21 +178,21 @@ def test_part_pipeline(num_chunks, num_parts):

            # check edge_data
            num_edges = {
-                'paper:cites:paper': num_cite_edges,
-                'author:writes:paper': num_write_edges,
-                'paper:rev_writes:author': num_write_edges,
+                "paper:cites:paper": num_cite_edges,
+                "author:writes:paper": num_write_edges,
+                "paper:rev_writes:author": num_write_edges,
            }
-            output_dir = os.path.join(root_dir, 'chunked-data')
-            output_edge_data_dir = os.path.join(output_dir, 'edge_data')
+            output_dir = os.path.join(root_dir, "chunked-data")
+            output_edge_data_dir = os.path.join(output_dir, "edge_data")
            for etype, feat in [
-                ['paper:cites:paper', 'count'],
-                ['author:writes:paper', 'year'],
-                ['paper:rev_writes:author', 'year'],
+                ["paper:cites:paper", "count"],
+                ["author:writes:paper", "year"],
+                ["paper:rev_writes:author", "year"],
            ]:
                output_edge_sub_dir = os.path.join(output_edge_data_dir, etype)
                features = []
                for i in range(num_chunks):
-                    chunk_f_name = '{}-{}.npy'.format(feat, i)
+                    chunk_f_name = "{}-{}.npy".format(feat, i)
                    chunk_f_name = os.path.join(
                        output_edge_sub_dir, chunk_f_name
                    )
@@ -199,54 +200,54 @@ def test_part_pipeline(num_chunks, num_parts):
                    feat_array = np.load(chunk_f_name)
                    assert feat_array.shape[0] == num_edges[etype] // num_chunks
                features.append(feat_array)
-                edge_data_gold[etype + '/' + feat] = np.concatenate(features)
+                edge_data_gold[etype + "/" + feat] = np.concatenate(features)

        for i in range(num_parts):
-            sub_dir = 'part-' + str(i)
+            sub_dir = "part-" + str(i)
            assert meta_data[sub_dir][
-                'node_feats'
-            ] == 'part{}/node_feat.dgl'.format(i)
+                "node_feats"
+            ] == "part{}/node_feat.dgl".format(i)
            assert meta_data[sub_dir][
-                'edge_feats'
-            ] == 'part{}/edge_feat.dgl'.format(i)
+                "edge_feats"
+            ] == "part{}/edge_feat.dgl".format(i)
            assert meta_data[sub_dir][
-                'part_graph'
-            ] == 'part{}/graph.dgl'.format(i)
+                "part_graph"
+            ] == "part{}/graph.dgl".format(i)

            # check data
-            sub_dir = os.path.join(out_dir, 'part' + str(i))
+            sub_dir = os.path.join(out_dir, "part" + str(i))

            # graph.dgl
-            fname = os.path.join(sub_dir, 'graph.dgl')
+            fname = os.path.join(sub_dir, "graph.dgl")
            assert os.path.isfile(fname)
            g_list, data_dict = load_graphs(fname)
            part_g = g_list[0]
            assert isinstance(part_g, dgl.DGLGraph)

            # node_feat.dgl
-            fname = os.path.join(sub_dir, 'node_feat.dgl')
+            fname = os.path.join(sub_dir, "node_feat.dgl")
            assert os.path.isfile(fname)
            tensor_dict = load_tensors(fname)
            all_tensors = [
-                'paper/feat',
-                'paper/label',
-                'paper/year',
-                'paper/orig_ids',
+                "paper/feat",
+                "paper/label",
+                "paper/year",
+                "paper/orig_ids",
            ]
            assert tensor_dict.keys() == set(all_tensors)
            for key in all_tensors:
                assert isinstance(tensor_dict[key], torch.Tensor)
-            ndata_paper_orig_ids = tensor_dict['paper/orig_ids']
+            ndata_paper_orig_ids = tensor_dict["paper/orig_ids"]

            # orig_nids.dgl
-            fname = os.path.join(sub_dir, 'orig_nids.dgl')
+            fname = os.path.join(sub_dir, "orig_nids.dgl")
            assert os.path.isfile(fname)
            orig_nids = load_tensors(fname)
            assert len(orig_nids.keys()) == 3
-            assert torch.equal(ndata_paper_orig_ids, orig_nids['paper'])
+            assert torch.equal(ndata_paper_orig_ids, orig_nids["paper"])

            # orig_eids.dgl
-            fname = os.path.join(sub_dir, 'orig_eids.dgl')
+            fname = os.path.join(sub_dir, "orig_eids.dgl")
            assert os.path.isfile(fname)
            orig_eids = load_tensors(fname)
            assert len(orig_eids.keys()) == 4
@@ -254,13 +255,13 @@ def test_part_pipeline(num_chunks, num_parts):
            if include_edge_data:

                # Read edge_feat.dgl
-                fname = os.path.join(sub_dir, 'edge_feat.dgl')
+                fname = os.path.join(sub_dir, "edge_feat.dgl")
                assert os.path.isfile(fname)
                tensor_dict = load_tensors(fname)
                all_tensors = [
-                    'paper:cites:paper/count',
-                    'author:writes:paper/year',
-                    'paper:rev_writes:author/year',
+                    "paper:cites:paper/count",
+                    "author:writes:paper/year",
+                    "paper:rev_writes:author/year",
                ]
                assert tensor_dict.keys() == set(all_tensors)
                for key in all_tensors:

--- a/tests/tools/test_launch.py
+++ b/tests/tools/test_launch.py
-import unittest
 import json
-import tempfile
 import os
+import tempfile
+import unittest
+
 from launch import *

+
 class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
    """wrap_udf_in_torch_dist_launcher()"""

@@ -18,14 +20,18 @@ class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
            master_addr="127.0.0.1",
            master_port=1234,
        )
-        expected = "python3.7 -m torch.distributed.launch " \
-                   "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " \
-                   "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        expected = (
+            "python3.7 -m torch.distributed.launch "
+            "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 "
+            "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        )
        self.assertEqual(wrapped_udf_command, expected)

    def test_chained_udf(self):
        # test that a chained udf_command is properly handled
-        udf_command = "cd path/to && python3.7 path/to/some/trainer.py arg1 arg2"
+        udf_command = (
+            "cd path/to && python3.7 path/to/some/trainer.py arg1 arg2"
+        )
        wrapped_udf_command = wrap_udf_in_torch_dist_launcher(
            udf_command=udf_command,
            num_trainers=2,
@@ -34,15 +40,21 @@ class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
            master_addr="127.0.0.1",
            master_port=1234,
        )
-        expected = "cd path/to && python3.7 -m torch.distributed.launch " \
-                   "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " \
-                   "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        expected = (
+            "cd path/to && python3.7 -m torch.distributed.launch "
+            "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 "
+            "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+        )
        self.assertEqual(wrapped_udf_command, expected)

    def test_py_versions(self):
        # test that this correctly handles different py versions/binaries
        py_binaries = (
-            "python3.7", "python3.8", "python3.9", "python3", "python"
+            "python3.7",
+            "python3.8",
+            "python3.9",
+            "python3",
+            "python",
        )
        udf_command = "{python_bin} path/to/some/trainer.py arg1 arg2"

@@ -55,9 +67,13 @@ class TestWrapUdfInTorchDistLauncher(unittest.TestCase):
                master_addr="127.0.0.1",
                master_port=1234,
            )
-            expected = "{python_bin} -m torch.distributed.launch ".format(python_bin=py_bin) + \
-                       "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 " \
-                       "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+            expected = (
+                "{python_bin} -m torch.distributed.launch ".format(
+                    python_bin=py_bin
+                )
+                + "--nproc_per_node=2 --nnodes=2 --node_rank=1 --master_addr=127.0.0.1 "
+                "--master_port=1234 path/to/some/trainer.py arg1 arg2"
+            )
            self.assertEqual(wrapped_udf_command, expected)


@@ -67,12 +83,13 @@ class TestWrapCmdWithLocalEnvvars(unittest.TestCase):
    def test_simple(self):
        self.assertEqual(
            wrap_cmd_with_local_envvars("ls && pwd", "VAR1=value1 VAR2=value2"),
-            "(export VAR1=value1 VAR2=value2; ls && pwd)"
+            "(export VAR1=value1 VAR2=value2; ls && pwd)",
        )


 class TestConstructDglServerEnvVars(unittest.TestCase):
    """construct_dgl_server_env_vars()"""
+
    def test_simple(self):
        self.assertEqual(
            construct_dgl_server_env_vars(
@@ -83,7 +100,7 @@ class TestConstructDglServerEnvVars(unittest.TestCase):
                ip_config="path/to/ip.config",
                num_servers=5,
                graph_format="csc",
-                keep_alive=False
+                keep_alive=False,
            ),
            (
                "DGL_ROLE=server "
@@ -95,12 +112,13 @@ class TestConstructDglServerEnvVars(unittest.TestCase):
                "DGL_NUM_SERVER=5 "
                "DGL_GRAPH_FORMAT=csc "
                "DGL_KEEP_ALIVE=0 "
-            )
+            ),
        )


 class TestConstructDglClientEnvVars(unittest.TestCase):
    """construct_dgl_client_env_vars()"""
+
    def test_simple(self):
        # with pythonpath
        self.assertEqual(
@@ -113,7 +131,7 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                graph_format="csc",
                num_omp_threads=4,
                group_id=0,
-                pythonpath="some/pythonpath/"
+                pythonpath="some/pythonpath/",
            ),
            (
                "DGL_DIST_MODE=distributed "
@@ -127,7 +145,7 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                "OMP_NUM_THREADS=4 "
                "DGL_GROUP_ID=0 "
                "PYTHONPATH=some/pythonpath/ "
-            )
+            ),
        )
        # without pythonpath
        self.assertEqual(
@@ -139,7 +157,7 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                num_servers=3,
                graph_format="csc",
                num_omp_threads=4,
-                group_id=0
+                group_id=0,
            ),
            (
                "DGL_DIST_MODE=distributed "
@@ -152,64 +170,72 @@ class TestConstructDglClientEnvVars(unittest.TestCase):
                "DGL_GRAPH_FORMAT=csc "
                "OMP_NUM_THREADS=4 "
                "DGL_GROUP_ID=0 "
-            )
+            ),
        )


 def test_submit_jobs():
-    class Args():
+    class Args:
        pass
+
    args = Args()

    with tempfile.TemporaryDirectory() as test_dir:
        num_machines = 8
-        ip_config = os.path.join(test_dir, 'ip_config.txt')
-        with open(ip_config, 'w') as f:
+        ip_config = os.path.join(test_dir, "ip_config.txt")
+        with open(ip_config, "w") as f:
            for i in range(num_machines):
-                f.write('{} {}\n'.format('127.0.0.'+str(i), 30050))
-        part_config = os.path.join(test_dir, 'ogb-products.json')
-        with open(part_config, 'w') as f:
-            json.dump({'num_parts': num_machines}, f)
+                f.write("{} {}\n".format("127.0.0." + str(i), 30050))
+        part_config = os.path.join(test_dir, "ogb-products.json")
+        with open(part_config, "w") as f:
+            json.dump({"num_parts": num_machines}, f)
        args.num_trainers = 8
        args.num_samplers = 1
        args.num_servers = 4
        args.workspace = test_dir
-        args.part_config = 'ogb-products.json'
-        args.ip_config = 'ip_config.txt'
-        args.server_name = 'ogb-products'
+        args.part_config = "ogb-products.json"
+        args.ip_config = "ip_config.txt"
+        args.server_name = "ogb-products"
        args.keep_alive = False
        args.num_server_threads = 1
-        args.graph_format = 'csc'
+        args.graph_format = "csc"
        args.extra_envs = ["NCCL_DEBUG=INFO"]
        args.num_omp_threads = 1
        udf_command = "python3 train_dist.py --num_epochs 10"
        clients_cmd, servers_cmd = submit_jobs(args, udf_command, dry_run=True)

        def common_checks():
-            assert 'cd ' + test_dir in cmd
-            assert 'export ' + args.extra_envs[0] in cmd
-            assert f'DGL_NUM_SAMPLER={args.num_samplers}' in cmd
-            assert f'DGL_NUM_CLIENT={args.num_trainers*(args.num_samplers+1)*num_machines}' in cmd
-            assert f'DGL_CONF_PATH={args.part_config}' in cmd
-            assert f'DGL_IP_CONFIG={args.ip_config}' in cmd
-            assert f'DGL_NUM_SERVER={args.num_servers}' in cmd
-            assert f'DGL_GRAPH_FORMAT={args.graph_format}' in cmd
-            assert f'OMP_NUM_THREADS={args.num_omp_threads}' in cmd
-            assert udf_command[len('python3 '):] in cmd
+            assert "cd " + test_dir in cmd
+            assert "export " + args.extra_envs[0] in cmd
+            assert f"DGL_NUM_SAMPLER={args.num_samplers}" in cmd
+            assert (
+                f"DGL_NUM_CLIENT={args.num_trainers*(args.num_samplers+1)*num_machines}"
+                in cmd
+            )
+            assert f"DGL_CONF_PATH={args.part_config}" in cmd
+            assert f"DGL_IP_CONFIG={args.ip_config}" in cmd
+            assert f"DGL_NUM_SERVER={args.num_servers}" in cmd
+            assert f"DGL_GRAPH_FORMAT={args.graph_format}" in cmd
+            assert f"OMP_NUM_THREADS={args.num_omp_threads}" in cmd
+            assert udf_command[len("python3 ") :] in cmd
+
        for cmd in clients_cmd:
            common_checks()
-            assert 'DGL_DIST_MODE=distributed' in cmd
-            assert 'DGL_ROLE=client' in cmd
-            assert 'DGL_GROUP_ID=0' in cmd
-            assert f'python3 -m torch.distributed.launch --nproc_per_node={args.num_trainers} --nnodes={num_machines}' in cmd
-            assert '--master_addr=127.0.0' in cmd
-            assert '--master_port=1234' in cmd
+            assert "DGL_DIST_MODE=distributed" in cmd
+            assert "DGL_ROLE=client" in cmd
+            assert "DGL_GROUP_ID=0" in cmd
+            assert (
+                f"python3 -m torch.distributed.launch --nproc_per_node={args.num_trainers} --nnodes={num_machines}"
+                in cmd
+            )
+            assert "--master_addr=127.0.0" in cmd
+            assert "--master_port=1234" in cmd
        for cmd in servers_cmd:
            common_checks()
-            assert 'DGL_ROLE=server' in cmd
-            assert 'DGL_KEEP_ALIVE=0' in cmd
-            assert 'DGL_SERVER_ID=' in cmd
+            assert "DGL_ROLE=server" in cmd
+            assert "DGL_KEEP_ALIVE=0" in cmd
+            assert "DGL_SERVER_ID=" in cmd


-if __name__ == '__main__':
+if __name__ == "__main__":
    unittest.main()
--- a/tools/chunk_graph.py
+++ b/tools/chunk_graph.py
 # See the __main__ block for usage of chunk_graph().
-import pathlib
 import json
-from contextlib import contextmanager
 import logging
 import os
+import pathlib
+from contextlib import contextmanager

 import torch
+from utils import array_readwriter, setdir
+
 import dgl

-from utils import setdir
-from utils import array_readwriter

 def chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt):
    paths = []
@@ -17,31 +17,38 @@ def chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt):

    for j, n in enumerate(chunk_sizes):
        path = os.path.abspath(path_fmt % j)
-        arr_chunk = arr[offset:offset + n]
-        logging.info('Chunking %d-%d' % (offset, offset + n))
+        arr_chunk = arr[offset : offset + n]
+        logging.info("Chunking %d-%d" % (offset, offset + n))
        array_readwriter.get_array_parser(**fmt_meta).write(path, arr_chunk)
        offset += n
        paths.append(path)

    return paths

+
 def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
    # First deal with ndata and edata that are homogeneous (i.e. not a dict-of-dict)
-    if len(g.ntypes) == 1 and not isinstance(next(iter(ndata_paths.values())), dict):
+    if len(g.ntypes) == 1 and not isinstance(
+        next(iter(ndata_paths.values())), dict
+    ):
        ndata_paths = {g.ntypes[0]: ndata_paths}
-    if len(g.etypes) == 1 and not isinstance(next(iter(edata_paths.values())), dict):
+    if len(g.etypes) == 1 and not isinstance(
+        next(iter(edata_paths.values())), dict
+    ):
        edata_paths = {g.etypes[0]: ndata_paths}
    # Then convert all edge types to canonical edge types
-    etypestrs = {etype: ':'.join(etype) for etype in g.canonical_etypes}
-    edata_paths = {':'.join(g.to_canonical_etype(k)): v for k, v in edata_paths.items()}
+    etypestrs = {etype: ":".join(etype) for etype in g.canonical_etypes}
+    edata_paths = {
+        ":".join(g.to_canonical_etype(k)): v for k, v in edata_paths.items()
+    }

    metadata = {}

-    metadata['graph_name'] = name
-    metadata['node_type'] = g.ntypes
+    metadata["graph_name"] = name
+    metadata["node_type"] = g.ntypes

    # Compute the number of nodes per chunk per node type
-    metadata['num_nodes_per_chunk'] = num_nodes_per_chunk = []
+    metadata["num_nodes_per_chunk"] = num_nodes_per_chunk = []
    for ntype in g.ntypes:
        num_nodes = g.num_nodes(ntype)
        num_nodes_list = []
@@ -49,12 +56,14 @@ def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
            n = num_nodes // num_chunks + (i < num_nodes % num_chunks)
            num_nodes_list.append(n)
        num_nodes_per_chunk.append(num_nodes_list)
-    num_nodes_per_chunk_dict = {k: v for k, v in zip(g.ntypes, num_nodes_per_chunk)}
+    num_nodes_per_chunk_dict = {
+        k: v for k, v in zip(g.ntypes, num_nodes_per_chunk)
+    }

-    metadata['edge_type'] = [etypestrs[etype] for etype in g.canonical_etypes]
+    metadata["edge_type"] = [etypestrs[etype] for etype in g.canonical_etypes]

    # Compute the number of edges per chunk per edge type
-    metadata['num_edges_per_chunk'] = num_edges_per_chunk = []
+    metadata["num_edges_per_chunk"] = num_edges_per_chunk = []
    for etype in g.canonical_etypes:
        num_edges = g.num_edges(etype)
        num_edges_list = []
@@ -62,67 +71,88 @@ def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
            n = num_edges // num_chunks + (i < num_edges % num_chunks)
            num_edges_list.append(n)
        num_edges_per_chunk.append(num_edges_list)
-    num_edges_per_chunk_dict = {k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk)}
+    num_edges_per_chunk_dict = {
+        k: v for k, v in zip(g.canonical_etypes, num_edges_per_chunk)
+    }

    # Split edge index
-    metadata['edges'] = {}
-    with setdir('edge_index'):
+    metadata["edges"] = {}
+    with setdir("edge_index"):
        for etype in g.canonical_etypes:
            etypestr = etypestrs[etype]
-            logging.info('Chunking edge index for %s' % etypestr)
+            logging.info("Chunking edge index for %s" % etypestr)
            edges_meta = {}
            fmt_meta = {"name": "csv", "delimiter": " "}
-            edges_meta['format'] = fmt_meta
+            edges_meta["format"] = fmt_meta

            srcdst = torch.stack(g.edges(etype=etype), 1)
-            edges_meta['data'] = chunk_numpy_array(
-                    srcdst.numpy(), fmt_meta, num_edges_per_chunk_dict[etype],
-                    etypestr + '%d.txt')
-            metadata['edges'][etypestr] = edges_meta
+            edges_meta["data"] = chunk_numpy_array(
+                srcdst.numpy(),
+                fmt_meta,
+                num_edges_per_chunk_dict[etype],
+                etypestr + "%d.txt",
+            )
+            metadata["edges"][etypestr] = edges_meta

    # Chunk node data
-    metadata['node_data'] = {}
-    with setdir('node_data'):
+    metadata["node_data"] = {}
+    with setdir("node_data"):
        for ntype, ndata_per_type in ndata_paths.items():
            ndata_meta = {}
            with setdir(ntype):
                for key, path in ndata_per_type.items():
-                    logging.info('Chunking node data for type %s key %s' % (ntype, key))
+                    logging.info(
+                        "Chunking node data for type %s key %s" % (ntype, key)
+                    )
                    ndata_key_meta = {}
                    reader_fmt_meta = writer_fmt_meta = {"name": "numpy"}
-                    arr = array_readwriter.get_array_parser(**reader_fmt_meta).read(path)
-                    ndata_key_meta['format'] = writer_fmt_meta
-                    ndata_key_meta['data'] = chunk_numpy_array(
-                            arr, writer_fmt_meta, num_nodes_per_chunk_dict[ntype],
-                            key + '-%d.npy')
+                    arr = array_readwriter.get_array_parser(
+                        **reader_fmt_meta
+                    ).read(path)
+                    ndata_key_meta["format"] = writer_fmt_meta
+                    ndata_key_meta["data"] = chunk_numpy_array(
+                        arr,
+                        writer_fmt_meta,
+                        num_nodes_per_chunk_dict[ntype],
+                        key + "-%d.npy",
+                    )
                    ndata_meta[key] = ndata_key_meta

-            metadata['node_data'][ntype] = ndata_meta
+            metadata["node_data"][ntype] = ndata_meta

    # Chunk edge data
-    metadata['edge_data'] = {}
-    with setdir('edge_data'):
+    metadata["edge_data"] = {}
+    with setdir("edge_data"):
        for etypestr, edata_per_type in edata_paths.items():
            edata_meta = {}
            with setdir(etypestr):
                for key, path in edata_per_type.items():
-                    logging.info('Chunking edge data for type %s key %s' % (etypestr, key))
+                    logging.info(
+                        "Chunking edge data for type %s key %s"
+                        % (etypestr, key)
+                    )
                    edata_key_meta = {}
                    reader_fmt_meta = writer_fmt_meta = {"name": "numpy"}
-                    arr = array_readwriter.get_array_parser(**reader_fmt_meta).read(path)
-                    edata_key_meta['format'] = writer_fmt_meta
-                    etype = tuple(etypestr.split(':'))
-                    edata_key_meta['data'] = chunk_numpy_array(
-                            arr, writer_fmt_meta, num_edges_per_chunk_dict[etype],
-                            key + '-%d.npy')
+                    arr = array_readwriter.get_array_parser(
+                        **reader_fmt_meta
+                    ).read(path)
+                    edata_key_meta["format"] = writer_fmt_meta
+                    etype = tuple(etypestr.split(":"))
+                    edata_key_meta["data"] = chunk_numpy_array(
+                        arr,
+                        writer_fmt_meta,
+                        num_edges_per_chunk_dict[etype],
+                        key + "-%d.npy",
+                    )
                    edata_meta[key] = edata_key_meta

-            metadata['edge_data'][etypestr] = edata_meta
+            metadata["edge_data"][etypestr] = edata_meta

-    metadata_path = 'metadata.json'
-    with open(metadata_path, 'w') as f:
+    metadata_path = "metadata.json"
+    with open(metadata_path, "w") as f:
        json.dump(metadata, f)
-    logging.info('Saved metadata in %s' % os.path.abspath(metadata_path))
+    logging.info("Saved metadata in %s" % os.path.abspath(metadata_path))
+

 def chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
    """
@@ -157,22 +187,29 @@ def chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path):
    with setdir(output_path):
        _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path)

-if __name__ == '__main__':
-    logging.basicConfig(level='INFO')
-    input_dir = '/data'
-    output_dir = '/chunked-data'
-    (g,), _ = dgl.load_graphs(os.path.join(input_dir, 'graph.dgl'))
+
+if __name__ == "__main__":
+    logging.basicConfig(level="INFO")
+    input_dir = "/data"
+    output_dir = "/chunked-data"
+    (g,), _ = dgl.load_graphs(os.path.join(input_dir, "graph.dgl"))
    chunk_graph(
-            g,
-            'mag240m',
-            {'paper': {
-                'feat': os.path.join(input_dir, 'paper/feat.npy'),
-                'label': os.path.join(input_dir, 'paper/label.npy'),
-                'year': os.path.join(input_dir, 'paper/year.npy')}},
-            {'cites': {'count': os.path.join(input_dir, 'cites/count.npy')},
-             'writes': {'year': os.path.join(input_dir, 'writes/year.npy')},
-             # you can put the same data file if they indeed share the features.
-             'rev_writes': {'year': os.path.join(input_dir, 'writes/year.npy')}},
-            4,
-            output_dir)
+        g,
+        "mag240m",
+        {
+            "paper": {
+                "feat": os.path.join(input_dir, "paper/feat.npy"),
+                "label": os.path.join(input_dir, "paper/label.npy"),
+                "year": os.path.join(input_dir, "paper/year.npy"),
+            }
+        },
+        {
+            "cites": {"count": os.path.join(input_dir, "cites/count.npy")},
+            "writes": {"year": os.path.join(input_dir, "writes/year.npy")},
+            # you can put the same data file if they indeed share the features.
+            "rev_writes": {"year": os.path.join(input_dir, "writes/year.npy")},
+        },
+        4,
+        output_dir,
+    )
 # The generated metadata goes as in tools/sample-config/mag240m-metadata.json.
--- a/tools/copy_files.py
+++ b/tools/copy_files.py
 """Copy the partitions to a cluster of machines."""
+import argparse
+import copy
+import json
+import logging
 import os
+import signal
 import stat
-import sys
 import subprocess
-import argparse
-import signal
-import logging
-import json
-import copy
+import sys
+
+
+def copy_file(file_name, ip, workspace, param=""):
+    print("copy {} to {}".format(file_name, ip + ":" + workspace + "/"))
+    cmd = "scp " + param + " " + file_name + " " + ip + ":" + workspace + "/"
+    subprocess.check_call(cmd, shell=True)

-def copy_file(file_name, ip, workspace, param=''):
-    print('copy {} to {}'.format(file_name, ip + ':' + workspace + '/'))
-    cmd = 'scp ' + param + ' ' + file_name + ' ' + ip + ':' + workspace + '/'
-    subprocess.check_call(cmd, shell = True)

 def exec_cmd(ip, cmd):
-    cmd = 'ssh -o StrictHostKeyChecking=no ' + ip + ' \'' + cmd + '\''
-    subprocess.check_call(cmd, shell = True)
+    cmd = "ssh -o StrictHostKeyChecking=no " + ip + " '" + cmd + "'"
+    subprocess.check_call(cmd, shell=True)
+

 def main():
-    parser = argparse.ArgumentParser(description='Copy data to the servers.')
-    parser.add_argument('--workspace', type=str, required=True,
-                        help='Path of user directory of distributed tasks. \
+    parser = argparse.ArgumentParser(description="Copy data to the servers.")
+    parser.add_argument(
+        "--workspace",
+        type=str,
+        required=True,
+        help="Path of user directory of distributed tasks. \
                        This is used to specify a destination location where \
-                        data are copied to on remote machines.')
-    parser.add_argument('--rel_data_path', type=str, required=True,
-                        help='Relative path in workspace to store the partition data.')
-    parser.add_argument('--part_config', type=str, required=True,
-                        help='The partition config file. The path is on the local machine.')
-    parser.add_argument('--script_folder', type=str, required=True,
-                        help='The folder contains all the user code scripts.')
-    parser.add_argument('--ip_config', type=str, required=True,
-                        help='The file of IP configuration for servers. \
-                        The path is on the local machine.')
+                        data are copied to on remote machines.",
+    )
+    parser.add_argument(
+        "--rel_data_path",
+        type=str,
+        required=True,
+        help="Relative path in workspace to store the partition data.",
+    )
+    parser.add_argument(
+        "--part_config",
+        type=str,
+        required=True,
+        help="The partition config file. The path is on the local machine.",
+    )
+    parser.add_argument(
+        "--script_folder",
+        type=str,
+        required=True,
+        help="The folder contains all the user code scripts.",
+    )
+    parser.add_argument(
+        "--ip_config",
+        type=str,
+        required=True,
+        help="The file of IP configuration for servers. \
+                        The path is on the local machine.",
+    )
    args = parser.parse_args()

    hosts = []
    with open(args.ip_config) as f:
        for line in f:
-            res = line.strip().split(' ')
+            res = line.strip().split(" ")
            ip = res[0]
            hosts.append(ip)
-    
+
    # We need to update the partition config file so that the paths are relative to
    # the workspace in the remote machines.
    with open(args.part_config) as conf_f:
        part_metadata = json.load(conf_f)
        tmp_part_metadata = copy.deepcopy(part_metadata)
-        num_parts = part_metadata['num_parts']
-        assert num_parts == len(hosts), \
-                'The number of partitions needs to be the same as the number of hosts.'
-        graph_name = part_metadata['graph_name']
-        node_map = part_metadata['node_map']
-        edge_map = part_metadata['edge_map']
+        num_parts = part_metadata["num_parts"]
+        assert num_parts == len(
+            hosts
+        ), "The number of partitions needs to be the same as the number of hosts."
+        graph_name = part_metadata["graph_name"]
+        node_map = part_metadata["node_map"]
+        edge_map = part_metadata["edge_map"]
        if not isinstance(node_map, dict):
-            assert node_map[-4:] == '.npy', 'node map should be stored in a NumPy array.'
-            tmp_part_metadata['node_map'] = '{}/{}/node_map.npy'.format(args.workspace,
-                                                                        args.rel_data_path)
+            assert (
+                node_map[-4:] == ".npy"
+            ), "node map should be stored in a NumPy array."
+            tmp_part_metadata["node_map"] = "{}/{}/node_map.npy".format(
+                args.workspace, args.rel_data_path
+            )
        if not isinstance(edge_map, dict):
-            assert edge_map[-4:] == '.npy', 'edge map should be stored in a NumPy array.'
-            tmp_part_metadata['edge_map'] = '{}/{}/edge_map.npy'.format(args.workspace,
-                                                                        args.rel_data_path)
+            assert (
+                edge_map[-4:] == ".npy"
+            ), "edge map should be stored in a NumPy array."
+            tmp_part_metadata["edge_map"] = "{}/{}/edge_map.npy".format(
+                args.workspace, args.rel_data_path
+            )

        for part_id in range(num_parts):
-            part_files = tmp_part_metadata['part-{}'.format(part_id)]
-            part_files['edge_feats'] = '{}/part{}/edge_feat.dgl'.format(args.rel_data_path, part_id)
-            part_files['node_feats'] = '{}/part{}/node_feat.dgl'.format(args.rel_data_path, part_id)
-            part_files['part_graph'] = '{}/part{}/graph.dgl'.format(args.rel_data_path, part_id)
-    tmp_part_config = '/tmp/{}.json'.format(graph_name)
-    with open(tmp_part_config, 'w') as outfile:
+            part_files = tmp_part_metadata["part-{}".format(part_id)]
+            part_files["edge_feats"] = "{}/part{}/edge_feat.dgl".format(
+                args.rel_data_path, part_id
+            )
+            part_files["node_feats"] = "{}/part{}/node_feat.dgl".format(
+                args.rel_data_path, part_id
+            )
+            part_files["part_graph"] = "{}/part{}/graph.dgl".format(
+                args.rel_data_path, part_id
+            )
+    tmp_part_config = "/tmp/{}.json".format(graph_name)
+    with open(tmp_part_config, "w") as outfile:
        json.dump(tmp_part_metadata, outfile, sort_keys=True, indent=4)

    # Copy ip config.
    for part_id, ip in enumerate(hosts):
-        remote_path = '{}/{}'.format(args.workspace, args.rel_data_path)
-        exec_cmd(ip, 'mkdir -p {}'.format(remote_path))
+        remote_path = "{}/{}".format(args.workspace, args.rel_data_path)
+        exec_cmd(ip, "mkdir -p {}".format(remote_path))

        copy_file(args.ip_config, ip, args.workspace)
-        copy_file(tmp_part_config, ip, '{}/{}'.format(args.workspace, args.rel_data_path))
-        node_map = part_metadata['node_map']
-        edge_map = part_metadata['edge_map']
+        copy_file(
+            tmp_part_config,
+            ip,
+            "{}/{}".format(args.workspace, args.rel_data_path),
+        )
+        node_map = part_metadata["node_map"]
+        edge_map = part_metadata["edge_map"]
        if not isinstance(node_map, dict):
-            copy_file(node_map, ip, tmp_part_metadata['node_map'])
+            copy_file(node_map, ip, tmp_part_metadata["node_map"])
        if not isinstance(edge_map, dict):
-            copy_file(edge_map, ip, tmp_part_metadata['edge_map'])
-        remote_path = '{}/{}/part{}'.format(args.workspace, args.rel_data_path, part_id)
-        exec_cmd(ip, 'mkdir -p {}'.format(remote_path))
-
-        part_files = part_metadata['part-{}'.format(part_id)]
-        copy_file(part_files['node_feats'], ip, remote_path)
-        copy_file(part_files['edge_feats'], ip, remote_path)
-        copy_file(part_files['part_graph'], ip, remote_path)
+            copy_file(edge_map, ip, tmp_part_metadata["edge_map"])
+        remote_path = "{}/{}/part{}".format(
+            args.workspace, args.rel_data_path, part_id
+        )
+        exec_cmd(ip, "mkdir -p {}".format(remote_path))
+
+        part_files = part_metadata["part-{}".format(part_id)]
+        copy_file(part_files["node_feats"], ip, remote_path)
+        copy_file(part_files["edge_feats"], ip, remote_path)
+        copy_file(part_files["part_graph"], ip, remote_path)
        # copy script folder
-        copy_file(args.script_folder, ip, args.workspace, '-r')
+        copy_file(args.script_folder, ip, args.workspace, "-r")


 def signal_handler(signal, frame):
-    logging.info('Stop copying')
+    logging.info("Stop copying")
    sys.exit(0)

-if __name__ == '__main__':
-    fmt = '%(asctime)s %(levelname)s %(message)s'
+
+if __name__ == "__main__":
+    fmt = "%(asctime)s %(levelname)s %(message)s"
    logging.basicConfig(format=fmt, level=logging.INFO)
    signal.signal(signal.SIGINT, signal_handler)
    main()
--- a/tools/dispatch_data.py
+++ b/tools/dispatch_data.py
 """Launching distributed graph partitioning pipeline """
-import os
-import sys
 import argparse
-import logging
 import json
+import logging
+import os
+import sys
+
 from partition_algo.base import load_partition_meta

-INSTALL_DIR = os.path.abspath(os.path.join(__file__, '..'))
+INSTALL_DIR = os.path.abspath(os.path.join(__file__, ".."))
 LAUNCH_SCRIPT = "distgraphlaunch.py"
 PIPELINE_SCRIPT = "distpartitioning/data_proc_pipeline.py"

@@ -23,6 +24,7 @@ LARG_IPCONF = "ip_config"
 LARG_MASTER_PORT = "master_port"
 LARG_SSH_PORT = "ssh_port"

+
 def get_launch_cmd(args) -> str:
    cmd = sys.executable + " " + os.path.join(INSTALL_DIR, LAUNCH_SCRIPT)
    cmd = f"{cmd} --{LARG_SSH_PORT} {args.ssh_port} "
@@ -34,7 +36,7 @@ def get_launch_cmd(args) -> str:


 def submit_jobs(args) -> str:
-    #read the json file and get the remaining argument here.
+    # read the json file and get the remaining argument here.
    schema_path = "metadata.json"
    with open(os.path.join(args.in_dir, schema_path)) as schema:
        schema_map = json.load(schema)
@@ -49,17 +51,22 @@ def submit_jobs(args) -> str:
        part_meta = load_partition_meta(partition_path)
        num_parts = part_meta.num_parts
    if num_parts > num_chunks:
-        raise Exception('Number of partitions should be less/equal than number of chunks.')
+        raise Exception(
+            "Number of partitions should be less/equal than number of chunks."
+        )

    # verify ip_config
-    with open(args.ip_config, 'r') as f:
+    with open(args.ip_config, "r") as f:
        num_ips = len(f.readlines())
-        assert num_ips == num_parts, \
-            f'The number of lines[{args.ip_config}] should be equal to num_parts[{num_parts}].'
+        assert (
+            num_ips == num_parts
+        ), f"The number of lines[{args.ip_config}] should be equal to num_parts[{num_parts}]."

    argslist = ""
    argslist += "--world-size {} ".format(num_parts)
-    argslist += "--partitions-dir {} ".format(os.path.abspath(args.partitions_dir))
+    argslist += "--partitions-dir {} ".format(
+        os.path.abspath(args.partitions_dir)
+    )
    argslist += "--input-dir {} ".format(os.path.abspath(args.in_dir))
    argslist += "--graph-name {} ".format(graph_name)
    argslist += "--schema {} ".format(schema_path)
@@ -75,28 +82,73 @@ def submit_jobs(args) -> str:
    udf_cmd = f"{args.python_path} {pipeline_cmd} {argslist}"

    launch_cmd = get_launch_cmd(args)
-    launch_cmd += '\"'+udf_cmd+'\"'
+    launch_cmd += '"' + udf_cmd + '"'

    print(launch_cmd)
    os.system(launch_cmd)

+
 def main():
-    parser = argparse.ArgumentParser(description='Dispatch edge index and data to partitions', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
-
-    parser.add_argument('--in-dir', type=str, help='Location of the input directory where the dataset is located')
-    parser.add_argument('--partitions-dir', type=str, help='Location of the partition-id mapping files which define node-ids and their respective partition-ids, relative to the input directory')
-    parser.add_argument('--out-dir', type=str, help='Location of the output directory where the graph partitions will be created by this pipeline')
-    parser.add_argument('--ip-config', type=str, help='File location of IP configuration for server processes')
-    parser.add_argument('--master-port', type=int, default=12345, help='port used by gloo group to create randezvous point')
-    parser.add_argument('--log-level', type=str, default="info", help='To enable log level for debugging purposes. Available options: (Critical, Error, Warning, Info, Debug, Notset)')
-    parser.add_argument('--python-path', type=str, default=sys.executable, help='Path to the Python executable on all workers')
-    parser.add_argument('--ssh-port', type=int, default=22, help='SSH Port.') 
-    parser.add_argument('--process-group-timeout', type=int, default=1800,
-                        help='timeout[seconds] for operations executed against the process group')
-    parser.add_argument('--save-orig-nids', action='store_true',
-                        help='Save original node IDs into files')
-    parser.add_argument('--save-orig-eids', action='store_true',
-                        help='Save original edge IDs into files')
+    parser = argparse.ArgumentParser(
+        description="Dispatch edge index and data to partitions",
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+
+    parser.add_argument(
+        "--in-dir",
+        type=str,
+        help="Location of the input directory where the dataset is located",
+    )
+    parser.add_argument(
+        "--partitions-dir",
+        type=str,
+        help="Location of the partition-id mapping files which define node-ids and their respective partition-ids, relative to the input directory",
+    )
+    parser.add_argument(
+        "--out-dir",
+        type=str,
+        help="Location of the output directory where the graph partitions will be created by this pipeline",
+    )
+    parser.add_argument(
+        "--ip-config",
+        type=str,
+        help="File location of IP configuration for server processes",
+    )
+    parser.add_argument(
+        "--master-port",
+        type=int,
+        default=12345,
+        help="port used by gloo group to create randezvous point",
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="info",
+        help="To enable log level for debugging purposes. Available options: (Critical, Error, Warning, Info, Debug, Notset)",
+    )
+    parser.add_argument(
+        "--python-path",
+        type=str,
+        default=sys.executable,
+        help="Path to the Python executable on all workers",
+    )
+    parser.add_argument("--ssh-port", type=int, default=22, help="SSH Port.")
+    parser.add_argument(
+        "--process-group-timeout",
+        type=int,
+        default=1800,
+        help="timeout[seconds] for operations executed against the process group",
+    )
+    parser.add_argument(
+        "--save-orig-nids",
+        action="store_true",
+        help="Save original node IDs into files",
+    )
+    parser.add_argument(
+        "--save-orig-eids",
+        action="store_true",
+        help="Save original edge IDs into files",
+    )

    args, udf_command = parser.parse_known_args()

@@ -109,7 +161,8 @@ def main():
    tokens = sys.executable.split(os.sep)
    submit_jobs(args)

-if __name__ == '__main__':
-    fmt = '%(asctime)s %(levelname)s %(message)s'
+
+if __name__ == "__main__":
+    fmt = "%(asctime)s %(levelname)s %(message)s"
    logging.basicConfig(format=fmt, level=logging.INFO)
    main()
--- a/tools/launch.py
+++ b/tools/launch.py
--- a/tools/partition_algo/random_partition.py
+++ b/tools/partition_algo/random_partition.py
 # Requires setting PYTHONPATH=${GITROOT}/tools
+import argparse
 import json
 import logging
-import sys
 import os
-import numpy as np
-import argparse
+import sys

-from utils import setdir
-from utils import array_readwriter
+import numpy as np
 from base import PartitionMeta, dump_partition_meta
+from utils import array_readwriter, setdir
+

 def _random_partition(metadata, num_parts):
-    num_nodes_per_type = [sum(_) for _ in metadata['num_nodes_per_chunk']]
-    ntypes = metadata['node_type']
+    num_nodes_per_type = [sum(_) for _ in metadata["num_nodes_per_chunk"]]
+    ntypes = metadata["node_type"]
    for ntype, n in zip(ntypes, num_nodes_per_type):
-        logging.info('Generating partition for node type %s' % ntype)
+        logging.info("Generating partition for node type %s" % ntype)
        parts = np.random.randint(0, num_parts, (n,))
-        array_readwriter.get_array_parser(name='csv').write(ntype + '.txt', parts)
+        array_readwriter.get_array_parser(name="csv").write(
+            ntype + ".txt", parts
+        )
+

 def random_partition(metadata, num_parts, output_path):
    """
@@ -31,22 +34,28 @@ def random_partition(metadata, num_parts, output_path):
    """
    with setdir(output_path):
        _random_partition(metadata, num_parts)
-        part_meta = PartitionMeta(version='1.0.0', num_parts=num_parts, algo_name='random')
-        dump_partition_meta(part_meta, 'partition_meta.json')
+        part_meta = PartitionMeta(
+            version="1.0.0", num_parts=num_parts, algo_name="random"
+        )
+        dump_partition_meta(part_meta, "partition_meta.json")
+

 # Run with PYTHONPATH=${GIT_ROOT_DIR}/tools
 # where ${GIT_ROOT_DIR} is the directory to the DGL git repository.
-if __name__ == '__main__':
+if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
-            '--in_dir', type=str, help='input directory that contains the metadata file')
-    parser.add_argument(
-            '--out_dir', type=str, help='output directory')
+        "--in_dir",
+        type=str,
+        help="input directory that contains the metadata file",
+    )
+    parser.add_argument("--out_dir", type=str, help="output directory")
    parser.add_argument(
-            '--num_partitions', type=int, help='number of partitions')
-    logging.basicConfig(level='INFO')
+        "--num_partitions", type=int, help="number of partitions"
+    )
+    logging.basicConfig(level="INFO")
    args = parser.parse_args()
-    with open(os.path.join(args.in_dir, 'metadata.json')) as f:
+    with open(os.path.join(args.in_dir, "metadata.json")) as f:
        metadata = json.load(f)
    num_parts = args.num_partitions
    random_partition(metadata, num_parts, args.out_dir)
--- a/tools/utils/__init__.py
+++ b/tools/utils/__init__.py
-from .files import *
 from . import array_readwriter
+from .files import *
--- a/tools/utils/array_readwriter/__init__.py
+++ b/tools/utils/array_readwriter/__init__.py
-from .registry import register_array_parser, get_array_parser
-
-from . import csv
-from . import numpy_array
+from . import csv, numpy_array
+from .registry import get_array_parser, register_array_parser
--- a/tools/utils/array_readwriter/csv.py
+++ b/tools/utils/array_readwriter/csv.py
 import logging
+
 import pandas as pd
 import pyarrow
 import pyarrow.csv
+
 from .registry import register_array_parser

+
 @register_array_parser("csv")
 class CSVArrayParser(object):
-    def __init__(self, delimiter=','):
+    def __init__(self, delimiter=","):
        self.delimiter = delimiter

    def read(self, path):
-        logging.info('Reading from %s using CSV format with configuration %s' % (
-            path, self.__dict__))
+        logging.info(
+            "Reading from %s using CSV format with configuration %s"
+            % (path, self.__dict__)
+        )
        # do not read the first line as header
        read_options = pyarrow.csv.ReadOptions(autogenerate_column_names=True)
        parse_options = pyarrow.csv.ParseOptions(delimiter=self.delimiter)
-        arr = pyarrow.csv.read_csv(path, read_options=read_options, parse_options=parse_options)
-        logging.info('Done reading from %s' % path)
+        arr = pyarrow.csv.read_csv(
+            path, read_options=read_options, parse_options=parse_options
+        )
+        logging.info("Done reading from %s" % path)
        return arr.to_pandas().to_numpy()

    def write(self, path, arr):
-        logging.info('Writing to %s using CSV format with configuration %s' % (
-            path, self.__dict__))
-        write_options = pyarrow.csv.WriteOptions(include_header=False, delimiter=self.delimiter)
+        logging.info(
+            "Writing to %s using CSV format with configuration %s"
+            % (path, self.__dict__)
+        )
+        write_options = pyarrow.csv.WriteOptions(
+            include_header=False, delimiter=self.delimiter
+        )
        arr = pyarrow.Table.from_pandas(pd.DataFrame(arr))
        pyarrow.csv.write_csv(arr, path, write_options=write_options)
-        logging.info('Done writing to %s' % path)
+        logging.info("Done writing to %s" % path)