[DistDGL][Lintrunner]Lintrunner for tools directory (#5261)

* lintrunner patch for gloo_wrapper.py * lintrunner changes to the tools directory.

[DistDGL][Lintrunner]Lintrunner for tools directory (#5261)
* lintrunner patch for gloo_wrapper.py * lintrunner changes to the tools directory.
aa42aaeb · kylasa · GitHub · 8b47bad5 · aa42aaeb · aa42aaeb
Unverified Commit aa42aaeb authored Feb 03, 2023 by kylasa Committed by GitHub Feb 03, 2023
18 changed files
--- a/tools/change_etype_to_canonical_etype.py
+++ b/tools/change_etype_to_canonical_etype.py
@@ -4,9 +4,9 @@ import logging
 import os
 import time
-import torch
 import dgl
+import torch
 from dgl._ffi.base import DGLError
 from dgl.data.utils import load_graphs
 from dgl.utils import toindex

--- a/tools/chunk_graph.py
+++ b/tools/chunk_graph.py
@@ -5,11 +5,12 @@ import os
 import pathlib
 from contextlib import contextmanager
+import dgl
 import torch
 from distpartitioning import array_readwriter
 from files import setdir
-import dgl
 def chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt):
    paths = []
@@ -26,7 +27,9 @@ def chunk_numpy_array(arr, fmt_meta, chunk_sizes, path_fmt):
    return paths
-def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt):
+def _chunk_graph(
+    g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt
+):
    # First deal with ndata and edata that are homogeneous (i.e. not a dict-of-dict)
    if len(g.ntypes) == 1 and not isinstance(
        next(iter(ndata_paths.values())), dict
@@ -96,7 +99,7 @@ def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path, dat
    # Chunk node data
    reader_fmt_meta, writer_fmt_meta = {"name": "numpy"}, {"name": data_fmt}
-    file_suffix = 'npy' if data_fmt == 'numpy' else 'parquet'
+    file_suffix = "npy" if data_fmt == "numpy" else "parquet"
    metadata["node_data"] = {}
    with setdir("node_data"):
        for ntype, ndata_per_type in ndata_paths.items():
@@ -154,7 +157,9 @@ def _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path, dat
    logging.info("Saved metadata in %s" % os.path.abspath(metadata_path))
-def chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt='numpy'):
+def chunk_graph(
+    g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt="numpy"
+):
    """
    Split the graph into multiple chunks.
@@ -185,7 +190,9 @@ def chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path, data
        for key in edata.keys():
            edata[key] = os.path.abspath(edata[key])
    with setdir(output_path):
-        _chunk_graph(g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt)
+        _chunk_graph(
+            g, name, ndata_paths, edata_paths, num_chunks, output_path, data_fmt
+        )
 if __name__ == "__main__":

--- a/tools/dispatch_data.py
+++ b/tools/dispatch_data.py
@@ -64,7 +64,9 @@ def submit_jobs(args) -> str:
    argslist = ""
    argslist += "--world-size {} ".format(num_ips)
-    argslist += "--partitions-dir {} ".format(os.path.abspath(args.partitions_dir))
+    argslist += "--partitions-dir {} ".format(
+        os.path.abspath(args.partitions_dir)
+    )
    argslist += "--input-dir {} ".format(os.path.abspath(args.in_dir))
    argslist += "--graph-name {} ".format(graph_name)
    argslist += "--schema {} ".format(schema_path)
@@ -74,7 +76,9 @@ def submit_jobs(args) -> str:
    argslist += "--log-level {} ".format(args.log_level)
    argslist += "--save-orig-nids " if args.save_orig_nids else ""
    argslist += "--save-orig-eids " if args.save_orig_eids else ""
-    argslist += f"--graph-formats {args.graph_formats} " if args.graph_formats else ""
+    argslist += (
+        f"--graph-formats {args.graph_formats} " if args.graph_formats else ""
+    )
    # (BarclayII) Is it safe to assume all the workers have the Python executable at the same path?
    pipeline_cmd = os.path.join(INSTALL_DIR, PIPELINE_SCRIPT)

--- a/tools/distgraphlaunch.py
+++ b/tools/distgraphlaunch.py
 """Launching tool for DGL distributed training"""
-import os
-import stat
-import sys
-import subprocess
 import argparse
-import signal
-import logging
-import time
 import json
+import logging
 import multiprocessing
+import os
 import re
+import signal
+import stat
+import subprocess
+import sys
+import time
 from functools import partial
 from threading import Thread
 from typing import Optional
 DEFAULT_PORT = 30050
 def cleanup_proc(get_all_remote_pids, conn):
-    '''This process tries to clean up the remote training tasks.
+    """This process tries to clean up the remote training tasks."""
-    '''
+    print("cleanupu process runs")
-    print('cleanupu process runs')
    # This process should not handle SIGINT.
    signal.signal(signal.SIGINT, signal.SIG_IGN)
    data = conn.recv()
    # If the launch process exits normally, this process doesn't need to do anything.
-    if data == 'exit':
+    if data == "exit":
        sys.exit(0)
    else:
        remote_pids = get_all_remote_pids()
        # Otherwise, we need to ssh to each machine and kill the training jobs.
        for (ip, port), pids in remote_pids.items():
            kill_process(ip, port, pids)
-    print('cleanup process exits')
+    print("cleanup process exits")
 def kill_process(ip, port, pids):
-    '''ssh to a remote machine and kill the specified processes.
+    """ssh to a remote machine and kill the specified processes."""
-    '''
    curr_pid = os.getpid()
    killed_pids = []
    # If we kill child processes first, the parent process may create more again. This happens
@@ -44,8 +44,14 @@ def kill_process(ip, port, pids):
    pids.sort()
    for pid in pids:
        assert curr_pid != pid
-        print('kill process {} on {}:{}'.format(pid, ip, port), flush=True)
+        print("kill process {} on {}:{}".format(pid, ip, port), flush=True)
-        kill_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'kill {}\''.format(pid)
+        kill_cmd = (
+            "ssh -o StrictHostKeyChecking=no -p "
+            + str(port)
+            + " "
+            + ip
+            + " 'kill {}'".format(pid)
+        )
        subprocess.run(kill_cmd, shell=True)
        killed_pids.append(pid)
    # It's possible that some of the processes are not killed. Let's try again.
@@ -56,29 +62,41 @@ def kill_process(ip, port, pids):
        else:
            killed_pids.sort()
            for pid in killed_pids:
-                print('kill process {} on {}:{}'.format(pid, ip, port), flush=True)
+                print(
-                kill_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'kill -9 {}\''.format(pid)
+                    "kill process {} on {}:{}".format(pid, ip, port), flush=True
+                )
+                kill_cmd = (
+                    "ssh -o StrictHostKeyChecking=no -p "
+                    + str(port)
+                    + " "
+                    + ip
+                    + " 'kill -9 {}'".format(pid)
+                )
                subprocess.run(kill_cmd, shell=True)
 def get_killed_pids(ip, port, killed_pids):
-    '''Get the process IDs that we want to kill but are still alive.
+    """Get the process IDs that we want to kill but are still alive."""
-    '''
    killed_pids = [str(pid) for pid in killed_pids]
-    killed_pids = ','.join(killed_pids)
+    killed_pids = ",".join(killed_pids)
-    ps_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'ps -p {} -h\''.format(killed_pids)
+    ps_cmd = (
+        "ssh -o StrictHostKeyChecking=no -p "
+        + str(port)
+        + " "
+        + ip
+        + " 'ps -p {} -h'".format(killed_pids)
+    )
    res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE)
    pids = []
-    for p in res.stdout.decode('utf-8').split('\n'):
+    for p in res.stdout.decode("utf-8").split("\n"):
        l = p.split()
        if len(l) > 0:
            pids.append(int(l[0]))
    return pids
 def execute_remote(
-    cmd: str,
+    cmd: str, ip: str, port: int, username: Optional[str] = ""
-    ip: str,
-    port: int,
-    username: Optional[str] = ""
 ) -> Thread:
    """Execute command line on remote machine via ssh.
@@ -115,15 +133,21 @@ def execute_remote(
    thread.start()
    return thread
 def get_remote_pids(ip, port, cmd_regex):
-    """Get the process IDs that run the command in the remote machine.
+    """Get the process IDs that run the command in the remote machine."""
-    """
    pids = []
    curr_pid = os.getpid()
    # Here we want to get the python processes. We may get some ssh processes, so we should filter them out.
-    ps_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'ps -aux | grep python | grep -v StrictHostKeyChecking\''
+    ps_cmd = (
+        "ssh -o StrictHostKeyChecking=no -p "
+        + str(port)
+        + " "
+        + ip
+        + " 'ps -aux | grep python | grep -v StrictHostKeyChecking'"
+    )
    res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE)
-    for p in res.stdout.decode('utf-8').split('\n'):
+    for p in res.stdout.decode("utf-8").split("\n"):
        l = p.split()
        if len(l) < 2:
            continue
@@ -132,28 +156,34 @@ def get_remote_pids(ip, port, cmd_regex):
        if res is not None and int(l[1]) != curr_pid:
            pids.append(l[1])
-    pid_str = ','.join([str(pid) for pid in pids])
+    pid_str = ",".join([str(pid) for pid in pids])
-    ps_cmd = 'ssh -o StrictHostKeyChecking=no -p ' + str(port) + ' ' + ip + ' \'pgrep -P {}\''.format(pid_str)
+    ps_cmd = (
+        "ssh -o StrictHostKeyChecking=no -p "
+        + str(port)
+        + " "
+        + ip
+        + " 'pgrep -P {}'".format(pid_str)
+    )
    res = subprocess.run(ps_cmd, shell=True, stdout=subprocess.PIPE)
-    pids1 = res.stdout.decode('utf-8').split('\n')
+    pids1 = res.stdout.decode("utf-8").split("\n")
    all_pids = []
    for pid in set(pids + pids1):
-        if pid == '' or int(pid) == curr_pid:
+        if pid == "" or int(pid) == curr_pid:
            continue
        all_pids.append(int(pid))
    all_pids.sort()
    return all_pids
 def get_all_remote_pids(hosts, ssh_port, udf_command):
-    '''Get all remote processes.
+    """Get all remote processes."""
-    '''
    remote_pids = {}
    for node_id, host in enumerate(hosts):
        ip, _ = host
        # When creating training processes in remote machines, we may insert some arguments
        # in the commands. We need to use regular expressions to match the modified command.
        cmds = udf_command.split()
-        new_udf_command = ' .*'.join(cmds)
+        new_udf_command = " .*".join(cmds)
        pids = get_remote_pids(ip, ssh_port, new_udf_command)
        remote_pids[(ip, ssh_port)] = pids
    return remote_pids
@@ -164,7 +194,7 @@ def construct_torch_dist_launcher_cmd(
    num_nodes: int,
    node_rank: int,
    master_addr: str,
-    master_port: int
+    master_port: int,
 ) -> str:
    """Constructs the torch distributed launcher command.
    Helper function.
@@ -179,18 +209,20 @@ def construct_torch_dist_launcher_cmd(
    Returns:
        cmd_str.
    """
-    torch_cmd_template = "-m torch.distributed.launch " \
+    torch_cmd_template = (
-                         "--nproc_per_node={nproc_per_node} " \
+        "-m torch.distributed.launch "
-                         "--nnodes={nnodes} " \
+        "--nproc_per_node={nproc_per_node} "
-                         "--node_rank={node_rank} " \
+        "--nnodes={nnodes} "
-                         "--master_addr={master_addr} " \
+        "--node_rank={node_rank} "
+        "--master_addr={master_addr} "
        "--master_port={master_port}"
+    )
    return torch_cmd_template.format(
        nproc_per_node=num_trainers,
        nnodes=num_nodes,
        node_rank=node_rank,
        master_addr=master_addr,
-        master_port=master_port
+        master_port=master_port,
    )
@@ -233,7 +265,7 @@ def wrap_udf_in_torch_dist_launcher(
        num_nodes=num_nodes,
        node_rank=node_rank,
        master_addr=master_addr,
-        master_port=master_port
+        master_port=master_port,
    )
    # Auto-detect the python binary that kicks off the distributed trainer code.
    # Note: This allowlist order matters, this will match with the FIRST matching entry. Thus, please add names to this
@@ -241,9 +273,14 @@ def wrap_udf_in_torch_dist_launcher(
    #           (python3.7, python3.8) -> (python3)
    # The allowed python versions are from this: https://www.dgl.ai/pages/start.html
    python_bin_allowlist = (
-        "python3.6", "python3.7", "python3.8", "python3.9", "python3",
+        "python3.6",
+        "python3.7",
+        "python3.8",
+        "python3.9",
+        "python3",
        # for backwards compatibility, accept python2 but technically DGL is a py3 library, so this is not recommended
-        "python2.7", "python2",
+        "python2.7",
+        "python2",
    )
    # If none of the candidate python bins match, then we go with the default `python`
    python_bin = "python"
@@ -258,7 +295,9 @@ def wrap_udf_in_torch_dist_launcher(
    #     python -m torch.distributed.launch [DIST TORCH ARGS] path/to/dist_trainer.py arg0 arg1
    # Note: if there are multiple python commands in `udf_command`, this may do the Wrong Thing, eg launch each
    #       python command within the torch distributed launcher.
-    new_udf_command = udf_command.replace(python_bin, f"{python_bin} {torch_dist_cmd}")
+    new_udf_command = udf_command.replace(
+        python_bin, f"{python_bin} {torch_dist_cmd}"
+    )
    return new_udf_command
@@ -322,6 +361,7 @@ def wrap_cmd_with_local_envvars(cmd: str, env_vars: str) -> str:
    #     https://stackoverflow.com/a/45993803
    return f"(export {env_vars}; {cmd})"
 def wrap_cmd_with_extra_envvars(cmd: str, env_vars: list) -> str:
    """Wraps a CLI command with extra env vars
@@ -341,6 +381,7 @@ def wrap_cmd_with_extra_envvars(cmd: str, env_vars: list) -> str:
    env_vars = " ".join(env_vars)
    return wrap_cmd_with_local_envvars(cmd, env_vars)
 def submit_jobs(args, udf_command):
    """Submit distributed jobs (server and client processes) via ssh"""
    hosts = []
@@ -348,7 +389,7 @@ def submit_jobs(args, udf_command):
    server_count_per_machine = 0
    # Get the IP addresses of the cluster.
-    #ip_config = os.path.join(args.workspace, args.ip_config)
+    # ip_config = os.path.join(args.workspace, args.ip_config)
    ip_config = args.ip_config
    with open(ip_config) as f:
        for line in f:
@@ -376,58 +417,76 @@ def submit_jobs(args, udf_command):
        server_env_vars_cur = f"{server_env_vars} RANK={i} MASTER_ADDR={hosts[0][0]} MASTER_PORT={args.master_port}"
        cmd = wrap_cmd_with_local_envvars(udf_command, server_env_vars_cur)
        print(cmd)
-        thread_list.append(execute_remote(cmd, ip, args.ssh_port, username=args.ssh_username))
+        thread_list.append(
+            execute_remote(cmd, ip, args.ssh_port, username=args.ssh_username)
+        )
    # Start a cleanup process dedicated for cleaning up remote training jobs.
-    conn1,conn2 = multiprocessing.Pipe()
+    conn1, conn2 = multiprocessing.Pipe()
    func = partial(get_all_remote_pids, hosts, args.ssh_port, udf_command)
    process = multiprocessing.Process(target=cleanup_proc, args=(func, conn1))
    process.start()
    def signal_handler(signal, frame):
-        logging.info('Stop launcher')
+        logging.info("Stop launcher")
        # We need to tell the cleanup process to kill remote training jobs.
-        conn2.send('cleanup')
+        conn2.send("cleanup")
        sys.exit(0)
    signal.signal(signal.SIGINT, signal_handler)
    for thread in thread_list:
        thread.join()
    # The training processes complete. We should tell the cleanup process to exit.
-    conn2.send('exit')
+    conn2.send("exit")
    process.join()
 def main():
-    parser = argparse.ArgumentParser(description='Launch a distributed job')
+    parser = argparse.ArgumentParser(description="Launch a distributed job")
-    parser.add_argument('--ssh_port', type=int, default=22, help='SSH Port.')
+    parser.add_argument("--ssh_port", type=int, default=22, help="SSH Port.")
    parser.add_argument(
-        "--ssh_username", default="",
+        "--ssh_username",
+        default="",
        help="Optional. When issuing commands (via ssh) to cluster, use the provided username in the ssh cmd. "
        "Example: If you provide --ssh_username=bob, then the ssh command will be like: 'ssh bob@1.2.3.4 CMD' "
-             "instead of 'ssh 1.2.3.4 CMD'"
+        "instead of 'ssh 1.2.3.4 CMD'",
+    )
+    parser.add_argument(
+        "--num_proc_per_machine",
+        type=int,
+        help="The number of server processes per machine",
+    )
+    parser.add_argument(
+        "--master_port",
+        type=int,
+        help="This port is used to form gloo group (randevouz server)",
+    )
+    parser.add_argument(
+        "--ip_config",
+        type=str,
+        help="The file (in workspace) of IP configuration for server processes",
    )
-    parser.add_argument('--num_proc_per_machine', type=int,
-                        help='The number of server processes per machine')
-    parser.add_argument('--master_port', type=int,
-                        help='This port is used to form gloo group (randevouz server)')
-    parser.add_argument('--ip_config', type=str,
-                        help='The file (in workspace) of IP configuration for server processes')
    args, udf_command = parser.parse_known_args()
-    assert len(udf_command) == 1, 'Please provide user command line.'
+    assert len(udf_command) == 1, "Please provide user command line."
-    assert args.num_proc_per_machine is not None and args.num_proc_per_machine > 0, \
+    assert (
-            '--num_proc_per_machine must be a positive number.'
+        args.num_proc_per_machine is not None and args.num_proc_per_machine > 0
-    assert args.ip_config is not None, \
+    ), "--num_proc_per_machine must be a positive number."
-            'A user has to specify an IP configuration file with --ip_config.'
+    assert (
+        args.ip_config is not None
+    ), "A user has to specify an IP configuration file with --ip_config."
    udf_command = str(udf_command[0])
-    if 'python' not in udf_command:
+    if "python" not in udf_command:
-        raise RuntimeError("DGL launching script can only support Python executable file.")
+        raise RuntimeError(
+            "DGL launching script can only support Python executable file."
+        )
    submit_jobs(args, udf_command)
-if __name__ == '__main__':
-    fmt = '%(asctime)s %(levelname)s %(message)s'
+if __name__ == "__main__":
+    fmt = "%(asctime)s %(levelname)s %(message)s"
    logging.basicConfig(format=fmt, level=logging.INFO)
    main()
--- a/tools/distpartitioning/array_readwriter/parquet.py
+++ b/tools/distpartitioning/array_readwriter/parquet.py
@@ -30,7 +30,9 @@ class ParquetArrayParser(object):
        # Spark ML feature processing produces single-column parquet files where each row is a vector object
        if len(data_types) == 1 and isinstance(data_types[0], pyarrow.ListType):
            arr = np.array(table.to_pandas().iloc[:, 0].to_list())
-            logging.debug(f"Parquet data under {path} converted from single vector per row to ndarray")
+            logging.debug(
+                f"Parquet data under {path} converted from single vector per row to ndarray"
+            )
        else:
            arr = table.to_pandas().to_numpy()
        if not shape:
@@ -49,8 +51,8 @@ class ParquetArrayParser(object):
            array = array.reshape(shape[0], -1)
        if vector_rows:
            table = pyarrow.table(
-                [pyarrow.array(array.tolist())],
+                [pyarrow.array(array.tolist())], names=["vector"]
-                names=["vector"])
+            )
            logging.info("Writing to %s using single-vector rows..." % path)
        else:
            table = pyarrow.Table.from_pandas(pd.DataFrame(array))

--- a/tools/distpartitioning/constants.py
+++ b/tools/distpartitioning/constants.py
--- a/tools/distpartitioning/convert_partition.py
+++ b/tools/distpartitioning/convert_partition.py
@@ -5,24 +5,31 @@ import logging
 import os
 import time
+import constants
 import dgl
 import numpy as np
 import pandas as pd
 import pyarrow
 import torch as th
-from pyarrow import csv
-import constants
-from utils import get_idranges, memory_snapshot, read_json
 from dgl.distributed.partition import (
-    RESERVED_FIELD_DTYPE,
    _etype_str_to_tuple,
    _etype_tuple_to_str,
+    RESERVED_FIELD_DTYPE,
 )
+from pyarrow import csv
+from utils import get_idranges, memory_snapshot, read_json
-def create_dgl_object(schema, part_id, node_data, edge_data, edgeid_offset,
+def create_dgl_object(
-                        return_orig_nids=False, return_orig_eids=False):
+    schema,
+    part_id,
+    node_data,
+    edge_data,
+    edgeid_offset,
+    return_orig_nids=False,
+    return_orig_eids=False,
+):
    """
    This function creates dgl objects for a given graph partition, as in function
    arguments.
@@ -107,12 +114,16 @@ def create_dgl_object(schema, part_id, node_data, edge_data, edgeid_offset,
        and value is a 1D tensor mapping between shuffled edge IDs and the original edge
        IDs for each edge type. Otherwise, ``None`` is returned.
    """
-    #create auxiliary data structures from the schema object
+    # create auxiliary data structures from the schema object
    memory_snapshot("CreateDGLObj_Begin", part_id)
-    _, global_nid_ranges = get_idranges(schema[constants.STR_NODE_TYPE],
+    _, global_nid_ranges = get_idranges(
-                                    schema[constants.STR_NUM_NODES_PER_CHUNK])
+        schema[constants.STR_NODE_TYPE],
-    _, global_eid_ranges = get_idranges(schema[constants.STR_EDGE_TYPE],
+        schema[constants.STR_NUM_NODES_PER_CHUNK],
-                                    schema[constants.STR_NUM_EDGES_PER_CHUNK])
+    )
+    _, global_eid_ranges = get_idranges(
+        schema[constants.STR_EDGE_TYPE],
+        schema[constants.STR_NUM_EDGES_PER_CHUNK],
+    )
    id_map = dgl.distributed.id_map.IdMap(global_nid_ranges)
@@ -147,15 +158,15 @@ def create_dgl_object(schema, part_id, node_data, edge_data, edgeid_offset,
    assert np.all(shuffle_global_nids[1:] - shuffle_global_nids[:-1] == 1)
    shuffle_global_nid_range = (shuffle_global_nids[0], shuffle_global_nids[-1])
    # Determine the node ID ranges of different node types.
    for ntype_name in global_nid_ranges:
        ntype_id = ntypes_map[ntype_name]
        type_nids = shuffle_global_nids[ntype_ids == ntype_id]
        node_map_val[ntype_name].append(
-            [int(type_nids[0]), int(type_nids[-1]) + 1])
+            [int(type_nids[0]), int(type_nids[-1]) + 1]
+        )
-    #process edges
+    # process edges
    memory_snapshot("CreateDGLObj_AssignEdgeData: ", part_id)
    shuffle_global_src_id = edge_data[constants.SHUFFLE_GLOBAL_SRC_ID]
    edge_data.pop(constants.SHUFFLE_GLOBAL_SRC_ID)
@@ -181,18 +192,32 @@ def create_dgl_object(schema, part_id, node_data, edge_data, edgeid_offset,
    edge_data.pop(constants.ETYPE_ID)
    edge_data = None
    gc.collect()
-    logging.info(f'There are {len(shuffle_global_src_id)} edges in partition {part_id}')
+    logging.info(
+        f"There are {len(shuffle_global_src_id)} edges in partition {part_id}"
+    )
    # It's not guaranteed that the edges are sorted based on edge type.
    # Let's sort edges and all attributes on the edges.
    if not np.all(np.diff(etype_ids) >= 0):
        sort_idx = np.argsort(etype_ids)
-        shuffle_global_src_id, shuffle_global_dst_id, global_src_id, global_dst_id, global_edge_id, etype_ids = \
+        (
-                shuffle_global_src_id[sort_idx], shuffle_global_dst_id[sort_idx], global_src_id[sort_idx], \
+            shuffle_global_src_id,
-                global_dst_id[sort_idx], global_edge_id[sort_idx], etype_ids[sort_idx]
+            shuffle_global_dst_id,
+            global_src_id,
+            global_dst_id,
+            global_edge_id,
+            etype_ids,
+        ) = (
+            shuffle_global_src_id[sort_idx],
+            shuffle_global_dst_id[sort_idx],
+            global_src_id[sort_idx],
+            global_dst_id[sort_idx],
+            global_edge_id[sort_idx],
+            etype_ids[sort_idx],
+        )
        assert np.all(np.diff(etype_ids) >= 0)
    else:
-        print(f'[Rank: {part_id} Edge data is already sorted !!!')
+        print(f"[Rank: {part_id} Edge data is already sorted !!!")
    # Determine the edge ID range of different edge types.
    edge_id_start = edgeid_offset
@@ -200,8 +225,9 @@ def create_dgl_object(schema, part_id, node_data, edge_data, edgeid_offset,
        etype = _etype_str_to_tuple(etype_name)
        assert len(etype) == 3
        etype_id = etypes_map[etype]
-        edge_map_val[etype].append([edge_id_start,
+        edge_map_val[etype].append(
-            edge_id_start + np.sum(etype_ids == etype_id)])
+            [edge_id_start, edge_id_start + np.sum(etype_ids == etype_id)]
+        )
        edge_id_start += np.sum(etype_ids == etype_id)
    memory_snapshot("CreateDGLObj_UniqueNodeIds: ", part_id)
@@ -209,25 +235,38 @@ def create_dgl_object(schema, part_id, node_data, edge_data, edgeid_offset,
    # Here the order of nodes is defined by the `np.unique` function
    # node order is as listed in the uniq_ids array
    ids = np.concatenate(
-        [shuffle_global_src_id, shuffle_global_dst_id,
+        [
-            np.arange(shuffle_global_nid_range[0], shuffle_global_nid_range[1] + 1)])
+            shuffle_global_src_id,
+            shuffle_global_dst_id,
+            np.arange(
+                shuffle_global_nid_range[0], shuffle_global_nid_range[1] + 1
+            ),
+        ]
+    )
    uniq_ids, idx, inverse_idx = np.unique(
-        ids, return_index=True, return_inverse=True)
+        ids, return_index=True, return_inverse=True
+    )
    assert len(uniq_ids) == len(idx)
    # We get the edge list with their node IDs mapped to a contiguous ID range.
-    part_local_src_id, part_local_dst_id = np.split(inverse_idx[:len(shuffle_global_src_id) * 2], 2)
+    part_local_src_id, part_local_dst_id = np.split(
-    inner_nodes = th.as_tensor(np.logical_and(
+        inverse_idx[: len(shuffle_global_src_id) * 2], 2
+    )
+    inner_nodes = th.as_tensor(
+        np.logical_and(
            uniq_ids >= shuffle_global_nid_range[0],
-            uniq_ids <= shuffle_global_nid_range[1]))
+            uniq_ids <= shuffle_global_nid_range[1],
+        )
+    )
-    #get the list of indices, from inner_nodes, which will sort inner_nodes as [True, True, ...., False, False, ...]
+    # get the list of indices, from inner_nodes, which will sort inner_nodes as [True, True, ...., False, False, ...]
-    #essentially local nodes will be placed before non-local nodes.
+    # essentially local nodes will be placed before non-local nodes.
    reshuffle_nodes = th.arange(len(uniq_ids))
-    reshuffle_nodes = th.cat([reshuffle_nodes[inner_nodes.bool()],
+    reshuffle_nodes = th.cat(
-                              reshuffle_nodes[inner_nodes == 0]])
+        [reshuffle_nodes[inner_nodes.bool()], reshuffle_nodes[inner_nodes == 0]]
+    )
-    '''
+    """
    Following procedure is used to map the part_local_src_id, part_local_dst_id to account for
    reshuffling of nodes (to order localy owned nodes prior to non-local nodes in a partition)
    1. Form a node_map, in this case a numpy array, which will be used to map old node-ids (pre-reshuffling)
@@ -261,59 +300,100 @@ def create_dgl_object(schema, part_id, node_data, edge_data, edgeid_offset,
    Since the edge are re-ordered in any way, there is no reordering required for edge related data
    during the DGL object creation.
-    '''
+    """
-    #create the mappings to generate mapped part_local_src_id and part_local_dst_id
+    # create the mappings to generate mapped part_local_src_id and part_local_dst_id
-    #This map will map from unshuffled node-ids to reshuffled-node-ids (which are ordered to prioritize 
+    # This map will map from unshuffled node-ids to reshuffled-node-ids (which are ordered to prioritize
-    #locally owned nodes).
+    # locally owned nodes).
-    nid_map = np.zeros((len(reshuffle_nodes,)))
+    nid_map = np.zeros(
+        (
+            len(
+                reshuffle_nodes,
+            )
+        )
+    )
    nid_map[reshuffle_nodes] = np.arange(len(reshuffle_nodes))
-    #Now map the edge end points to reshuffled_values.
+    # Now map the edge end points to reshuffled_values.
-    part_local_src_id, part_local_dst_id = nid_map[part_local_src_id], nid_map[part_local_dst_id]
+    part_local_src_id, part_local_dst_id = (
+        nid_map[part_local_src_id],
+        nid_map[part_local_dst_id],
+    )
-    #create the graph here now.
+    # create the graph here now.
-    part_graph = dgl.graph(data=(part_local_src_id, part_local_dst_id), num_nodes=len(uniq_ids))
+    part_graph = dgl.graph(
+        data=(part_local_src_id, part_local_dst_id), num_nodes=len(uniq_ids)
+    )
    part_graph.edata[dgl.EID] = th.arange(
-        edgeid_offset, edgeid_offset + part_graph.number_of_edges(), dtype=th.int64)
+        edgeid_offset,
-    part_graph.edata[dgl.ETYPE] = th.as_tensor(etype_ids, dtype=RESERVED_FIELD_DTYPE[dgl.ETYPE])
+        edgeid_offset + part_graph.number_of_edges(),
-    part_graph.edata['inner_edge'] = th.ones(part_graph.number_of_edges(),
+        dtype=th.int64,
-        dtype=RESERVED_FIELD_DTYPE['inner_edge'])
+    )
+    part_graph.edata[dgl.ETYPE] = th.as_tensor(
+        etype_ids, dtype=RESERVED_FIELD_DTYPE[dgl.ETYPE]
-    #compute per_type_ids and ntype for all the nodes in the graph.
+    )
-    global_ids = np.concatenate(
+    part_graph.edata["inner_edge"] = th.ones(
-            [global_src_id, global_dst_id, global_homo_nid])
+        part_graph.number_of_edges(), dtype=RESERVED_FIELD_DTYPE["inner_edge"]
+    )
+    # compute per_type_ids and ntype for all the nodes in the graph.
+    global_ids = np.concatenate([global_src_id, global_dst_id, global_homo_nid])
    part_global_ids = global_ids[idx]
    part_global_ids = part_global_ids[reshuffle_nodes]
    ntype, per_type_ids = id_map(part_global_ids)
-    #continue with the graph creation
+    # continue with the graph creation
-    part_graph.ndata[dgl.NTYPE] = th.as_tensor(ntype, dtype=RESERVED_FIELD_DTYPE[dgl.NTYPE])
+    part_graph.ndata[dgl.NTYPE] = th.as_tensor(
+        ntype, dtype=RESERVED_FIELD_DTYPE[dgl.NTYPE]
+    )
    part_graph.ndata[dgl.NID] = th.as_tensor(uniq_ids[reshuffle_nodes])
-    part_graph.ndata['inner_node'] = th.as_tensor(inner_nodes[reshuffle_nodes],
+    part_graph.ndata["inner_node"] = th.as_tensor(
-        dtype=RESERVED_FIELD_DTYPE['inner_node'])
+        inner_nodes[reshuffle_nodes], dtype=RESERVED_FIELD_DTYPE["inner_node"]
+    )
    orig_nids = None
    orig_eids = None
    if return_orig_nids:
        orig_nids = {}
        for ntype, ntype_id in ntypes_map.items():
-            mask = th.logical_and(part_graph.ndata[dgl.NTYPE] == ntype_id,
+            mask = th.logical_and(
-                                    part_graph.ndata['inner_node'])
+                part_graph.ndata[dgl.NTYPE] == ntype_id,
+                part_graph.ndata["inner_node"],
+            )
            orig_nids[ntype] = th.as_tensor(per_type_ids[mask])
    if return_orig_eids:
        orig_eids = {}
        for etype, etype_id in etypes_map.items():
-            mask = th.logical_and(part_graph.edata[dgl.ETYPE] == etype_id,
+            mask = th.logical_and(
-                                    part_graph.edata['inner_edge'])
+                part_graph.edata[dgl.ETYPE] == etype_id,
-            orig_eids[_etype_tuple_to_str(etype)] = th.as_tensor(global_edge_id[mask])
+                part_graph.edata["inner_edge"],
+            )
+            orig_eids[_etype_tuple_to_str(etype)] = th.as_tensor(
-    return part_graph, node_map_val, edge_map_val, ntypes_map, etypes_map, \
+                global_edge_id[mask]
-        orig_nids, orig_eids
+            )
-def create_metadata_json(graph_name, num_nodes, num_edges, part_id, num_parts, node_map_val, \
+    return (
-                            edge_map_val, ntypes_map, etypes_map, output_dir ):
+        part_graph,
+        node_map_val,
+        edge_map_val,
+        ntypes_map,
+        etypes_map,
+        orig_nids,
+        orig_eids,
+    )
+def create_metadata_json(
+    graph_name,
+    num_nodes,
+    num_edges,
+    part_id,
+    num_parts,
+    node_map_val,
+    edge_map_val,
+    ntypes_map,
+    etypes_map,
+    output_dir,
+):
    """
    Auxiliary function to create json file for the graph partition metadata
@@ -346,22 +426,26 @@ def create_metadata_json(graph_name, num_nodes, num_edges, part_id, num_parts, n
        map describing the graph information
    """
-    part_metadata = {'graph_name': graph_name,
+    part_metadata = {
-                     'num_nodes': num_nodes,
+        "graph_name": graph_name,
-                     'num_edges': num_edges,
+        "num_nodes": num_nodes,
-                     'part_method': 'metis',
+        "num_edges": num_edges,
-                     'num_parts': num_parts,
+        "part_method": "metis",
-                     'halo_hops': 1,
+        "num_parts": num_parts,
-                     'node_map': node_map_val,
+        "halo_hops": 1,
-                     'edge_map': edge_map_val,
+        "node_map": node_map_val,
-                     'ntypes': ntypes_map,
+        "edge_map": edge_map_val,
-                     'etypes': etypes_map}
+        "ntypes": ntypes_map,
+        "etypes": etypes_map,
-    part_dir = 'part' + str(part_id)
+    }
+    part_dir = "part" + str(part_id)
    node_feat_file = os.path.join(part_dir, "node_feat.dgl")
    edge_feat_file = os.path.join(part_dir, "edge_feat.dgl")
    part_graph_file = os.path.join(part_dir, "graph.dgl")
-    part_metadata['part-{}'.format(part_id)] = {'node_feats': node_feat_file,
+    part_metadata["part-{}".format(part_id)] = {
-                                                'edge_feats': edge_feat_file,
+        "node_feats": node_feat_file,
-                                                'part_graph': part_graph_file}
+        "edge_feats": edge_feat_file,
+        "part_graph": part_graph_file,
+    }
    return part_metadata
--- a/tools/distpartitioning/data_proc_pipeline.py
+++ b/tools/distpartitioning/data_proc_pipeline.py
@@ -8,61 +8,104 @@ import torch.multiprocessing as mp
 from data_shuffle import multi_machine_run, single_machine_run
 def log_params(params):
-    """ Print all the command line arguments for debugging purposes.
+    """Print all the command line arguments for debugging purposes.
    Parameters:
    -----------
    params: argparse object
        Argument Parser structure listing all the pre-defined parameters
    """
-    print('Input Dir: ', params.input_dir)
+    print("Input Dir: ", params.input_dir)
-    print('Graph Name: ', params.graph_name)
+    print("Graph Name: ", params.graph_name)
-    print('Schema File: ', params.schema)
+    print("Schema File: ", params.schema)
-    print('No. partitions: ', params.num_parts)
+    print("No. partitions: ", params.num_parts)
-    print('Output Dir: ', params.output)
+    print("Output Dir: ", params.output)
-    print('WorldSize: ', params.world_size)
+    print("WorldSize: ", params.world_size)
-    print('Metis partitions: ', params.partitions_file)
+    print("Metis partitions: ", params.partitions_file)
 if __name__ == "__main__":
    """
    Start of execution from this point.
    Invoke the appropriate function to begin execution
    """
-    #arguments which are already needed by the existing implementation of convert_partition.py
+    # arguments which are already needed by the existing implementation of convert_partition.py
-    parser = argparse.ArgumentParser(description='Construct graph partitions')
+    parser = argparse.ArgumentParser(description="Construct graph partitions")
-    parser.add_argument('--input-dir', required=True, type=str,
+    parser.add_argument(
-                     help='The directory path that contains the partition results.')
+        "--input-dir",
-    parser.add_argument('--graph-name', required=True, type=str,
+        required=True,
-                     help='The graph name')
+        type=str,
-    parser.add_argument('--schema', required=True, type=str,
+        help="The directory path that contains the partition results.",
-                     help='The schema of the graph')
+    )
-    parser.add_argument('--num-parts', required=True, type=int,
+    parser.add_argument(
-                     help='The number of partitions')
+        "--graph-name", required=True, type=str, help="The graph name"
-    parser.add_argument('--output', required=True, type=str,
+    )
-                    help='The output directory of the partitioned results')
+    parser.add_argument(
-    parser.add_argument('--partitions-dir', help='directory of the partition-ids for each node type',
+        "--schema", required=True, type=str, help="The schema of the graph"
-                    default=None, type=str)
+    )
-    parser.add_argument('--log-level', type=str, default="info", 
+    parser.add_argument(
-		    help='To enable log level for debugging purposes. Available options: \
+        "--num-parts", required=True, type=int, help="The number of partitions"
+    )
+    parser.add_argument(
+        "--output",
+        required=True,
+        type=str,
+        help="The output directory of the partitioned results",
+    )
+    parser.add_argument(
+        "--partitions-dir",
+        help="directory of the partition-ids for each node type",
+        default=None,
+        type=str,
+    )
+    parser.add_argument(
+        "--log-level",
+        type=str,
+        default="info",
+        help="To enable log level for debugging purposes. Available options: \
 			  (Critical, Error, Warning, Info, Debug, Notset), default value \
-			  is: Info')
+			  is: Info",
+    )
-    #arguments needed for the distributed implementation
+    # arguments needed for the distributed implementation
-    parser.add_argument('--world-size', help='no. of processes to spawn',
+    parser.add_argument(
-                    default=1, type=int, required=True)
+        "--world-size",
-    parser.add_argument('--process-group-timeout', required=True, type=int,
+        help="no. of processes to spawn",
-                        help='timeout[seconds] for operations executed against the process group '
+        default=1,
-                             '(see torch.distributed.init_process_group)')
+        type=int,
-    parser.add_argument('--save-orig-nids', action='store_true',
+        required=True,
-                        help='Save original node IDs into files')
+    )
-    parser.add_argument('--save-orig-eids', action='store_true',
+    parser.add_argument(
-                        help='Save original edge IDs into files')
+        "--process-group-timeout",
-    parser.add_argument('--graph-formats', default=None, type=str,
+        required=True,
-        help='Save partitions in specified formats.')
+        type=int,
+        help="timeout[seconds] for operations executed against the process group "
+        "(see torch.distributed.init_process_group)",
+    )
+    parser.add_argument(
+        "--save-orig-nids",
+        action="store_true",
+        help="Save original node IDs into files",
+    )
+    parser.add_argument(
+        "--save-orig-eids",
+        action="store_true",
+        help="Save original edge IDs into files",
+    )
+    parser.add_argument(
+        "--graph-formats",
+        default=None,
+        type=str,
+        help="Save partitions in specified formats.",
+    )
    params = parser.parse_args()
-    #invoke the pipeline function
+    # invoke the pipeline function
    numeric_level = getattr(logging, params.log_level.upper(), None)
-    logging.basicConfig(level=numeric_level, format=f"[{platform.node()} %(levelname)s %(asctime)s PID:%(process)d] %(message)s")
+    logging.basicConfig(
+        level=numeric_level,
+        format=f"[{platform.node()} %(levelname)s %(asctime)s PID:%(process)d] %(message)s",
+    )
    multi_machine_run(params)
--- a/tools/distpartitioning/data_shuffle.py
+++ b/tools/distpartitioning/data_shuffle.py
--- a/tools/distpartitioning/dataset_utils.py
+++ b/tools/distpartitioning/dataset_utils.py
+import gc
 import logging
 import os
-import gc
+import array_readwriter
+import constants
 import numpy as np
 import pyarrow
 import pyarrow.parquet as pq
 import torch
 import torch.distributed as dist
-import array_readwriter
-import constants
-from utils import get_idranges, map_partid_rank, generate_read_list
 from gloo_wrapper import alltoallv_cpu
+from utils import generate_read_list, get_idranges, map_partid_rank
 DATA_TYPE_ID = {
-    data_type: id for id, data_type in enumerate([
+    data_type: id
+    for id, data_type in enumerate(
+        [
            torch.float32,
            torch.float64,
            torch.float16,
@@ -25,17 +27,16 @@ DATA_TYPE_ID = {
            torch.int32,
            torch.int64,
            torch.bool,
-    ])
+        ]
+    )
 }
-REV_DATA_TYPE_ID = {
+REV_DATA_TYPE_ID = {id: data_type for data_type, id in DATA_TYPE_ID.items()}
-    id: data_type for data_type, id in DATA_TYPE_ID.items()
-}
 def _shuffle_data(data, rank, world_size, tids, num_parts):
-    '''Each process scatters loaded data to all processes in a group and
+    """Each process scatters loaded data to all processes in a group and
    return gathered data.
    Parameters
@@ -57,14 +58,15 @@ def _shuffle_data(data, rank, world_size, tids, num_parts):
    shuffled_data: tensor
        Shuffled node or edge data.
-    '''
+    """
    # Broadcast basic information of loaded data:
    #   1. number of data lines
    #   2. data dimension
    #   3. data type
-    assert len(data.shape) in [1, 2], (
+    assert len(data.shape) in [
-        f"Data is expected to be 1-D or 2-D but got {data.shape}."
+        1,
-    )
+        2,
+    ], f"Data is expected to be 1-D or 2-D but got {data.shape}."
    data_shape = list(data.shape)
    if len(data_shape) == 1:
        data_shape.append(1)
@@ -177,17 +179,17 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
        tids representing the corresponding edge feautres.
    """
-    #node features dictionary
+    # node features dictionary
-    #TODO: With the new file format, It is guaranteed that the input dataset will have 
+    # TODO: With the new file format, It is guaranteed that the input dataset will have
-    #no. of nodes with features (node-features) files and nodes metadata will always be the same.
+    # no. of nodes with features (node-features) files and nodes metadata will always be the same.
-    #This means the dimension indicating the no. of nodes in any node-feature files and the no. of
+    # This means the dimension indicating the no. of nodes in any node-feature files and the no. of
-    #nodes in the corresponding nodes metadata file will always be the same. With this guarantee, 
+    # nodes in the corresponding nodes metadata file will always be the same. With this guarantee,
-    #we can eliminate the `node_feature_tids` dictionary since the same information is also populated
+    # we can eliminate the `node_feature_tids` dictionary since the same information is also populated
-    #in the `node_tids` dictionary. This will be remnoved in the next iteration of code changes.
+    # in the `node_tids` dictionary. This will be remnoved in the next iteration of code changes.
    node_features = {}
    node_feature_tids = {}
-    '''
+    """
    The structure of the node_data is as follows, which is present in the input metadata json file. 
       "node_data" : {
            "ntype0-name" : {
@@ -244,22 +246,26 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
    which are owned by that particular rank. And using the "num_nodes_per_chunk" information each
    process can easily compute any nodes per-type node_id and global node_id.
    The node-ids are treated as int64's in order to support billions of nodes in the input graph.
-    '''
+    """
-    #read my nodes for each node type
+    # read my nodes for each node type
-    node_tids, ntype_gnid_offset = get_idranges(schema_map[constants.STR_NODE_TYPE], 
+    node_tids, ntype_gnid_offset = get_idranges(
+        schema_map[constants.STR_NODE_TYPE],
        schema_map[constants.STR_NUM_NODES_PER_CHUNK],
-                                    num_chunks=num_parts)
+        num_chunks=num_parts,
+    )
-    #iterate over the "node_data" dictionary in the schema_map
+    # iterate over the "node_data" dictionary in the schema_map
-    #read the node features if exists
+    # read the node features if exists
-    #also keep track of the type_nids for which the node_features are read.
+    # also keep track of the type_nids for which the node_features are read.
    dataset_features = schema_map[constants.STR_NODE_DATA]
-    if((dataset_features is not None) and (len(dataset_features) > 0)):
+    if (dataset_features is not None) and (len(dataset_features) > 0):
        for ntype_name, ntype_feature_data in dataset_features.items():
            for feat_name, feat_data in ntype_feature_data.items():
-                assert (feat_data[constants.STR_FORMAT][constants.STR_NAME]
+                assert feat_data[constants.STR_FORMAT][constants.STR_NAME] in [
-                in [constants.STR_NUMPY, constants.STR_PARQUET])
+                    constants.STR_NUMPY,
+                    constants.STR_PARQUET,
+                ]
                # It is guaranteed that num_chunks is always greater
                # than num_partitions.
                node_data = []
@@ -291,7 +297,8 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
                    rank,
                    world_size,
                    node_tids[ntype_name],
-                    num_parts)
+                    num_parts,
+                )
                # collect data on current rank.
                offset = 0
@@ -307,9 +314,11 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
                        node_feature_tids[data_key] = nfeat_tids
                        offset += end - start
-    #done building node_features locally. 
+    # done building node_features locally.
    if len(node_features) <= 0:
-        logging.info(f'[Rank: {rank}] This dataset does not have any node features')
+        logging.info(
+            f"[Rank: {rank}] This dataset does not have any node features"
+        )
    else:
        assert len(node_features) == len(node_feature_tids)
@@ -319,7 +328,9 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
        #   local_part_id indicates the partition-id, in the context of current
        #   process which take the values 0, 1, 2, ....
        for feat_name, feat_info in node_features.items():
-            logging.info(f'[Rank: {rank}] node feature name: {feat_name}, feature data shape: {feat_info.size()}')
+            logging.info(
+                f"[Rank: {rank}] node feature name: {feat_name}, feature data shape: {feat_info.size()}"
+            )
            tokens = feat_name.split("/")
            assert len(tokens) == 3
@@ -330,10 +341,11 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
            # Iterate over the range of type ids for the current node feature
            # and count the number of features for this feature name.
            count = tids[0][1] - tids[0][0]
-            assert count == feat_info.size()[0], f"{feat_name}, {count} vs {feat_info.size()[0]}."
+            assert (
+                count == feat_info.size()[0]
+            ), f"{feat_name}, {count} vs {feat_info.size()[0]}."
-    '''
+    """
    Reading edge features now.
    The structure of the edge_data is as follows, which is present in the input metadata json file. 
       "edge_data" : {
@@ -369,13 +381,16 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
    Data read from each of the node features file is a multi-dimensional tensor data and is read
    in numpy format, which is also the storage format of node features on the permanent storage.
-    '''
+    """
    edge_features = {}
    edge_feature_tids = {}
    # Read edges for each edge type that are processed by the currnet process.
-    edge_tids, _ = get_idranges(schema_map[constants.STR_EDGE_TYPE], 
+    edge_tids, _ = get_idranges(
-                                    schema_map[constants.STR_NUM_EDGES_PER_CHUNK], num_parts)
+        schema_map[constants.STR_EDGE_TYPE],
+        schema_map[constants.STR_NUM_EDGES_PER_CHUNK],
+        num_parts,
+    )
    # Iterate over the "edge_data" dictionary in the schema_map.
    # Read the edge features if exists.
@@ -384,8 +399,10 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
    if dataset_features and (len(dataset_features) > 0):
        for etype_name, etype_feature_data in dataset_features.items():
            for feat_name, feat_data in etype_feature_data.items():
-                assert (feat_data[constants.STR_FORMAT][constants.STR_NAME]
+                assert feat_data[constants.STR_FORMAT][constants.STR_NAME] in [
-                    in [constants.STR_NUMPY, constants.STR_PARQUET])
+                    constants.STR_NUMPY,
+                    constants.STR_PARQUET,
+                ]
                edge_data = []
                num_files = len(feat_data[constants.STR_DATA])
@@ -416,7 +433,8 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
                    rank,
                    world_size,
                    edge_tids[etype_name],
-                    num_parts)
+                    num_parts,
+                )
                # collect data on current rank.
                offset = 0
@@ -434,17 +452,21 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
    # Done with building node_features locally.
    if len(edge_features) <= 0:
-        logging.info(f'[Rank: {rank}] This dataset does not have any edge features')
+        logging.info(
+            f"[Rank: {rank}] This dataset does not have any edge features"
+        )
    else:
        assert len(edge_features) == len(edge_feature_tids)
        for k, v in edge_features.items():
-            logging.info(f'[Rank: {rank}] edge feature name: {k}, feature data shape: {v.shape}')
+            logging.info(
+                f"[Rank: {rank}] edge feature name: {k}, feature data shape: {v.shape}"
+            )
            tids = edge_feature_tids[k]
            count = tids[0][1] - tids[0][0]
            assert count == v.size()[0]
-    '''
+    """
    Code below is used to read edges from the input dataset with the help of the metadata json file
    for the input graph dataset. 
    In the metadata json file, we expect the following key-value pairs to help read the edges of the 
@@ -484,27 +506,33 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
    Each edge file contains two columns representing the source per-type node_ids and destination per-type node_ids
    of any given edge. Since these are node-ids as well they are read in as int64's.
-    '''
+    """
-    #read my edges for each edge type
+    # read my edges for each edge type
    etype_names = schema_map[constants.STR_EDGE_TYPE]
-    etype_name_idmap = {e : idx for idx, e in enumerate(etype_names)}
+    etype_name_idmap = {e: idx for idx, e in enumerate(etype_names)}
-    edge_tids, _ = get_idranges(schema_map[constants.STR_EDGE_TYPE],
+    edge_tids, _ = get_idranges(
+        schema_map[constants.STR_EDGE_TYPE],
        schema_map[constants.STR_NUM_EDGES_PER_CHUNK],
-                    num_chunks=num_parts)
+        num_chunks=num_parts,
+    )
    edge_datadict = {}
    edge_data = schema_map[constants.STR_EDGES]
-    #read the edges files and store this data in memory.
+    # read the edges files and store this data in memory.
-    for col in [constants.GLOBAL_SRC_ID, constants.GLOBAL_DST_ID, \
+    for col in [
-            constants.GLOBAL_TYPE_EID, constants.ETYPE_ID]:
+        constants.GLOBAL_SRC_ID,
+        constants.GLOBAL_DST_ID,
+        constants.GLOBAL_TYPE_EID,
+        constants.ETYPE_ID,
+    ]:
        edge_datadict[col] = []
    for etype_name, etype_info in edge_data.items():
        edge_info = etype_info[constants.STR_DATA]
-        #edgetype strings are in canonical format, src_node_type:edge_type:dst_node_type
+        # edgetype strings are in canonical format, src_node_type:edge_type:dst_node_type
        tokens = etype_name.split(":")
        assert len(tokens) == 3
@@ -525,50 +553,102 @@ def get_dataset(input_dir, graph_name, rank, world_size, num_parts, schema_map):
            edge_file = edge_info[idx]
            if not os.path.isabs(edge_file):
                edge_file = os.path.join(input_dir, edge_file)
-            logging.info(f'Loading edges of etype[{etype_name}] from {edge_file}')
+            logging.info(
+                f"Loading edges of etype[{etype_name}] from {edge_file}"
+            )
-            if etype_info[constants.STR_FORMAT][constants.STR_NAME] == constants.STR_CSV:
+            if (
-                read_options=pyarrow.csv.ReadOptions(use_threads=True, block_size=4096, autogenerate_column_names=True)
+                etype_info[constants.STR_FORMAT][constants.STR_NAME]
-                parse_options=pyarrow.csv.ParseOptions(delimiter=' ')
+                == constants.STR_CSV
-                with pyarrow.csv.open_csv(edge_file, read_options=read_options, parse_options=parse_options) as reader:
+            ):
+                read_options = pyarrow.csv.ReadOptions(
+                    use_threads=True,
+                    block_size=4096,
+                    autogenerate_column_names=True,
+                )
+                parse_options = pyarrow.csv.ParseOptions(delimiter=" ")
+                with pyarrow.csv.open_csv(
+                    edge_file,
+                    read_options=read_options,
+                    parse_options=parse_options,
+                ) as reader:
                    for next_chunk in reader:
                        if next_chunk is None:
                            break
                        next_table = pyarrow.Table.from_batches([next_chunk])
-                        src_ids.append(next_table['f0'].to_numpy())
+                        src_ids.append(next_table["f0"].to_numpy())
-                        dst_ids.append(next_table['f1'].to_numpy())
+                        dst_ids.append(next_table["f1"].to_numpy())
-            elif etype_info[constants.STR_FORMAT][constants.STR_NAME] == constants.STR_PARQUET:
+            elif (
+                etype_info[constants.STR_FORMAT][constants.STR_NAME]
+                == constants.STR_PARQUET
+            ):
                data_df = pq.read_table(edge_file)
                data_df = data_df.rename_columns(["f0", "f1"])
-                src_ids.append(data_df['f0'].to_numpy())
+                src_ids.append(data_df["f0"].to_numpy())
-                dst_ids.append(data_df['f1'].to_numpy())
+                dst_ids.append(data_df["f1"].to_numpy())
            else:
-                raise ValueError(f'Unknown edge format {etype_info[constants.STR_FORMAT][constants.STR_NAME]} for edge type {etype_name}')
+                raise ValueError(
+                    f"Unknown edge format {etype_info[constants.STR_FORMAT][constants.STR_NAME]} for edge type {etype_name}"
+                )
        src_ids = np.concatenate(src_ids)
        dst_ids = np.concatenate(dst_ids)
-        #currently these are just type_edge_ids... which will be converted to global ids
+        # currently these are just type_edge_ids... which will be converted to global ids
-        edge_datadict[constants.GLOBAL_SRC_ID].append(src_ids + ntype_gnid_offset[src_ntype_name][0, 0])
+        edge_datadict[constants.GLOBAL_SRC_ID].append(
-        edge_datadict[constants.GLOBAL_DST_ID].append(dst_ids + ntype_gnid_offset[dst_ntype_name][0, 0])
+            src_ids + ntype_gnid_offset[src_ntype_name][0, 0]
-        edge_datadict[constants.ETYPE_ID].append(etype_name_idmap[etype_name] * \
+        )
-            np.ones(shape=(src_ids.shape), dtype=np.int64))
+        edge_datadict[constants.GLOBAL_DST_ID].append(
+            dst_ids + ntype_gnid_offset[dst_ntype_name][0, 0]
+        )
+        edge_datadict[constants.ETYPE_ID].append(
+            etype_name_idmap[etype_name]
+            * np.ones(shape=(src_ids.shape), dtype=np.int64)
+        )
        for local_part_id in range(num_parts):
-            if (map_partid_rank(local_part_id, world_size) == rank):
+            if map_partid_rank(local_part_id, world_size) == rank:
-                edge_datadict[constants.GLOBAL_TYPE_EID].append(np.arange(edge_tids[etype_name][local_part_id][0],\
+                edge_datadict[constants.GLOBAL_TYPE_EID].append(
-                    edge_tids[etype_name][local_part_id][1] ,dtype=np.int64))
+                    np.arange(
+                        edge_tids[etype_name][local_part_id][0],
+                        edge_tids[etype_name][local_part_id][1],
+                        dtype=np.int64,
+                    )
+                )
-    #stitch together to create the final data on the local machine
+    # stitch together to create the final data on the local machine
-    for col in [constants.GLOBAL_SRC_ID, constants.GLOBAL_DST_ID, constants.GLOBAL_TYPE_EID, constants.ETYPE_ID]:
+    for col in [
+        constants.GLOBAL_SRC_ID,
+        constants.GLOBAL_DST_ID,
+        constants.GLOBAL_TYPE_EID,
+        constants.ETYPE_ID,
+    ]:
        edge_datadict[col] = np.concatenate(edge_datadict[col])
-    assert edge_datadict[constants.GLOBAL_SRC_ID].shape == edge_datadict[constants.GLOBAL_DST_ID].shape
+    assert (
-    assert edge_datadict[constants.GLOBAL_DST_ID].shape == edge_datadict[constants.GLOBAL_TYPE_EID].shape
+        edge_datadict[constants.GLOBAL_SRC_ID].shape
-    assert edge_datadict[constants.GLOBAL_TYPE_EID].shape == edge_datadict[constants.ETYPE_ID].shape
+        == edge_datadict[constants.GLOBAL_DST_ID].shape
-    logging.info(f'[Rank: {rank}] Done reading edge_file: {len(edge_datadict)}, {edge_datadict[constants.GLOBAL_SRC_ID].shape}')
+    )
-    logging.info(f'Rank: {rank} edge_feat_tids: {edge_feature_tids}')
+    assert (
+        edge_datadict[constants.GLOBAL_DST_ID].shape
-    return node_tids, node_features, node_feature_tids, edge_datadict, edge_tids, edge_features, edge_feature_tids
+        == edge_datadict[constants.GLOBAL_TYPE_EID].shape
+    )
+    assert (
+        edge_datadict[constants.GLOBAL_TYPE_EID].shape
+        == edge_datadict[constants.ETYPE_ID].shape
+    )
+    logging.info(
+        f"[Rank: {rank}] Done reading edge_file: {len(edge_datadict)}, {edge_datadict[constants.GLOBAL_SRC_ID].shape}"
+    )
+    logging.info(f"Rank: {rank} edge_feat_tids: {edge_feature_tids}")
+    return (
+        node_tids,
+        node_features,
+        node_feature_tids,
+        edge_datadict,
+        edge_tids,
+        edge_features,
+        edge_feature_tids,
+    )
--- a/tools/distpartitioning/dist_lookup.py
+++ b/tools/distpartitioning/dist_lookup.py
+import copy
 import logging
 import os
 import numpy as np
 import pyarrow
 import torch
-import copy
+from gloo_wrapper import alltoallv_cpu
 from pyarrow import csv
-from gloo_wrapper import alltoallv_cpu
 from utils import map_partid_rank
 class DistLookupService:
-    '''
+    """
    This is an implementation of a Distributed Lookup Service to provide the following
    services to its users. Map 1) global node-ids to partition-ids, and 2) global node-ids
    to shuffle global node-ids (contiguous, within each node for a give node_type and across
@@ -44,7 +45,7 @@ class DistLookupService:
        integer indicating the rank of a given process
    world_size : integer
        integer indicating the total no. of processes
-    '''
+    """
    def __init__(self, input_dir, ntype_names, id_map, rank, world_size):
        assert os.path.isdir(input_dir)
@@ -60,19 +61,28 @@ class DistLookupService:
        # Iterate over the node types and extract the partition id mappings.
        for ntype in ntype_names:
-            filename = f'{ntype}.txt'
+            filename = f"{ntype}.txt"
-            logging.info(f'[Rank: {rank}] Reading file: {os.path.join(input_dir, filename)}')
+            logging.info(
+                f"[Rank: {rank}] Reading file: {os.path.join(input_dir, filename)}"
-            read_options=pyarrow.csv.ReadOptions(use_threads=True, block_size=4096, autogenerate_column_names=True)
+            )
-            parse_options=pyarrow.csv.ParseOptions(delimiter=' ')
+            read_options = pyarrow.csv.ReadOptions(
+                use_threads=True,
+                block_size=4096,
+                autogenerate_column_names=True,
+            )
+            parse_options = pyarrow.csv.ParseOptions(delimiter=" ")
            ntype_partids = []
-            with pyarrow.csv.open_csv(os.path.join(input_dir, '{}.txt'.format(ntype)),
+            with pyarrow.csv.open_csv(
-                    read_options=read_options, parse_options=parse_options) as reader:
+                os.path.join(input_dir, "{}.txt".format(ntype)),
+                read_options=read_options,
+                parse_options=parse_options,
+            ) as reader:
                for next_chunk in reader:
                    if next_chunk is None:
                        break
                    next_table = pyarrow.Table.from_batches([next_chunk])
-                    ntype_partids.append(next_table['f0'].to_numpy())
+                    ntype_partids.append(next_table["f0"].to_numpy())
            ntype_partids = np.concatenate(ntype_partids)
            count = len(ntype_partids)
@@ -80,9 +90,12 @@ class DistLookupService:
            # Each rank assumes a contiguous set of partition-ids which are equally split
            # across all the processes.
-            split_size = np.ceil(count/np.int64(world_size)).astype(np.int64)
+            split_size = np.ceil(count / np.int64(world_size)).astype(np.int64)
-            start, end = np.int64(rank)*split_size, np.int64(rank+1)*split_size
+            start, end = (
-            if rank == (world_size-1):
+                np.int64(rank) * split_size,
+                np.int64(rank + 1) * split_size,
+            )
+            if rank == (world_size - 1):
                end = count
            type_nid_begin.append(start)
            type_nid_end.append(end)
@@ -102,9 +115,8 @@ class DistLookupService:
        self.rank = rank
        self.world_size = world_size
    def get_partition_ids(self, global_nids):
-        '''
+        """
        This function is used to get the partition-ids for a given set of global node ids
        global_nids <-> partition-ids mappings are deterministically  distributed across
@@ -134,7 +146,7 @@ class DistLookupService:
        list of integers :
            list of integers, which are the partition-ids of the global-node-ids (which is the
            function argument)
-        '''
+        """
        # Find the process where global_nid --> partition-id(owner) is stored.
        ntype_ids, type_nids = self.id_map(global_nids)
@@ -146,8 +158,10 @@ class DistLookupService:
        # The no. of these mappings stored by each process, in the lookup service, are
        # equally split among all the processes in the lookup service, deterministically.
        typeid_counts = self.ntype_count[ntype_ids]
-        chunk_sizes = np.ceil(typeid_counts/self.world_size).astype(np.int64)
+        chunk_sizes = np.ceil(typeid_counts / self.world_size).astype(np.int64)
-        service_owners = np.floor_divide(type_nids, chunk_sizes).astype(np.int64)
+        service_owners = np.floor_divide(type_nids, chunk_sizes).astype(
+            np.int64
+        )
        # Now `service_owners` is a list of ranks (process-ids) which own the corresponding
        # global-nid <-> partition-id mapping.
@@ -164,7 +178,9 @@ class DistLookupService:
            send_list.append(torch.from_numpy(ll))
            indices_list.append(idxes[0])
        assert len(np.concatenate(indices_list)) == len(global_nids)
-        assert np.all(np.sort(np.concatenate(indices_list)) == np.arange(len(global_nids)))
+        assert np.all(
+            np.sort(np.concatenate(indices_list)) == np.arange(len(global_nids))
+        )
        # Send the request to everyone else.
        # As a result of this operation, the current process also receives a list of lists
@@ -201,7 +217,10 @@ class DistLookupService:
                local_type_nids = global_type_nids - self.type_nid_begin[tid]
                assert np.all(local_type_nids >= 0)
-                assert np.all(local_type_nids <= (self.type_nid_end[tid] + 1 - self.type_nid_begin[tid]))
+                assert np.all(
+                    local_type_nids
+                    <= (self.type_nid_end[tid] + 1 - self.type_nid_begin[tid])
+                )
                cur_owners = self.partid_list[tid][local_type_nids]
                type_id_lookups.append(cur_owners)
@@ -226,7 +245,9 @@ class DistLookupService:
        # Order according to the requesting order.
        # Owner_resp_list is the list of owner-ids for global_nids (function argument).
-        owner_ids = torch.cat([x for x in owner_resp_list if x is not None]).numpy()
+        owner_ids = torch.cat(
+            [x for x in owner_resp_list if x is not None]
+        ).numpy()
        assert len(owner_ids) == len(global_nids)
        global_nids_order = np.concatenate(indices_list)
@@ -238,8 +259,10 @@ class DistLookupService:
        # Now the owner_ids (partition-ids) which corresponding to the  global_nids.
        return owner_ids
-    def get_shuffle_nids(self, global_nids, my_global_nids, my_shuffle_global_nids, world_size):
+    def get_shuffle_nids(
-        '''
+        self, global_nids, my_global_nids, my_shuffle_global_nids, world_size
+    ):
+        """
        This function is used to retrieve shuffle_global_nids for a given set of incoming
        global_nids. Note that global_nids are of random order and will contain duplicates
@@ -276,7 +299,7 @@ class DistLookupService:
        list of integers:
            list of shuffle_global_nids which correspond to the incoming node-ids in the
            global_nids.
-        '''
+        """
        # Get the owner_ids (partition-ids or rank).
        owner_ids = self.get_partition_ids(global_nids)
@@ -318,8 +341,15 @@ class DistLookupService:
                shuffle_nids_list.append(torch.empty((0,), dtype=torch.int64))
                continue
-            uniq_ids, inverse_idx = np.unique(cur_global_nids[idx], return_inverse=True)
+            uniq_ids, inverse_idx = np.unique(
-            common, idx1, idx2 = np.intersect1d(uniq_ids, my_global_nids, assume_unique=True, return_indices=True)
+                cur_global_nids[idx], return_inverse=True
+            )
+            common, idx1, idx2 = np.intersect1d(
+                uniq_ids,
+                my_global_nids,
+                assume_unique=True,
+                return_indices=True,
+            )
            assert len(common) == len(uniq_ids)
            req_shuffle_global_nids = my_shuffle_global_nids[idx2][inverse_idx]
@@ -327,7 +357,9 @@ class DistLookupService:
            shuffle_nids_list.append(torch.from_numpy(req_shuffle_global_nids))
        # Send the shuffle-global-nids to their respective ranks.
-        mapped_global_nids = alltoallv_cpu(self.rank, self.world_size, shuffle_nids_list)
+        mapped_global_nids = alltoallv_cpu(
+            self.rank, self.world_size, shuffle_nids_list
+        )
        for idx in range(len(mapped_global_nids)):
            if mapped_global_nids[idx] == None:
                mapped_global_nids[idx] = torch.empty((0,), dtype=torch.int64)
@@ -338,7 +370,7 @@ class DistLookupService:
        assert len(shuffle_global_nids) == len(global_nids)
        sorted_idx = np.argsort(global_nids_order)
-        shuffle_global_nids = shuffle_global_nids[ sorted_idx ]
+        shuffle_global_nids = shuffle_global_nids[sorted_idx]
        global_nids_ordered = global_nids_order[sorted_idx]
        assert np.all(global_nids_ordered == np.arange(len(global_nids)))

--- a/tools/distpartitioning/globalids.py
+++ b/tools/distpartitioning/globalids.py
 import itertools
 import operator
+import constants
 import numpy as np
 import torch
-import constants
 from dist_lookup import DistLookupService
 from gloo_wrapper import allgather_sizes, alltoallv_cpu
 from utils import memory_snapshot
@@ -33,34 +33,49 @@ def get_shuffle_global_nids(rank, world_size, global_nids_ranks, node_data):
        where the column-0 are global_nids and column-1 are shuffle_global_nids which are retrieved
        from other processes.
    """
-    #build a list of sizes (lengths of lists)
+    # build a list of sizes (lengths of lists)
    global_nids_ranks = [torch.from_numpy(x) for x in global_nids_ranks]
    recv_nodes = alltoallv_cpu(rank, world_size, global_nids_ranks)
    # Use node_data to lookup global id to send over.
    send_nodes = []
    for proc_i_nodes in recv_nodes:
-        #list of node-ids to lookup
+        # list of node-ids to lookup
        if proc_i_nodes is not None:
            global_nids = proc_i_nodes.numpy()
-            if(len(global_nids) != 0):
+            if len(global_nids) != 0:
-                common, ind1, ind2 = np.intersect1d(node_data[constants.GLOBAL_NID], global_nids, return_indices=True)
+                common, ind1, ind2 = np.intersect1d(
-                shuffle_global_nids = node_data[constants.SHUFFLE_GLOBAL_NID][ind1]
+                    node_data[constants.GLOBAL_NID],
-                send_nodes.append(torch.from_numpy(shuffle_global_nids).type(dtype=torch.int64))
+                    global_nids,
+                    return_indices=True,
+                )
+                shuffle_global_nids = node_data[constants.SHUFFLE_GLOBAL_NID][
+                    ind1
+                ]
+                send_nodes.append(
+                    torch.from_numpy(shuffle_global_nids).type(
+                        dtype=torch.int64
+                    )
+                )
            else:
                send_nodes.append(torch.empty((0), dtype=torch.int64))
        else:
            send_nodes.append(torch.empty((0), dtype=torch.int64))
-    #send receive global-ids
+    # send receive global-ids
    recv_shuffle_global_nids = alltoallv_cpu(rank, world_size, send_nodes)
-    shuffle_global_nids = np.concatenate([x.numpy() if x is not None else [] for x in recv_shuffle_global_nids])
+    shuffle_global_nids = np.concatenate(
+        [x.numpy() if x is not None else [] for x in recv_shuffle_global_nids]
+    )
    global_nids = np.concatenate([x for x in global_nids_ranks])
    ret_val = np.column_stack([global_nids, shuffle_global_nids])
    return ret_val
-def lookup_shuffle_global_nids_edges(rank, world_size, num_parts, edge_data, id_lookup, node_data):
-    '''
+def lookup_shuffle_global_nids_edges(
+    rank, world_size, num_parts, edge_data, id_lookup, node_data
+):
+    """
    This function is a helper function used to lookup shuffle-global-nids for a given set of
    global-nids using a distributed lookup service.
@@ -87,7 +102,7 @@ def lookup_shuffle_global_nids_edges(rank, world_size, num_parts, edge_data, id_
    dictionary :
        dictionary where keys are column names and values are numpy arrays representing all the
        edges present in the current graph partition
-    '''
+    """
    # Make sure that the outgoing message size does not exceed 2GB in size.
    # Even though gloo can handle upto 10GB size of data in the outgoing messages,
    # it needs additional memory to store temporary information into the buffers which will increase
@@ -98,18 +113,26 @@ def lookup_shuffle_global_nids_edges(rank, world_size, num_parts, edge_data, id_
    local_nids = []
    local_shuffle_nids = []
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
-        local_nids.append(node_data[constants.GLOBAL_NID+"/"+str(local_part_id)])
+        local_nids.append(
-        local_shuffle_nids.append(node_data[constants.SHUFFLE_GLOBAL_NID+"/"+str(local_part_id)])
+            node_data[constants.GLOBAL_NID + "/" + str(local_part_id)]
+        )
+        local_shuffle_nids.append(
+            node_data[constants.SHUFFLE_GLOBAL_NID + "/" + str(local_part_id)]
+        )
    local_nids = np.concatenate(local_nids)
    local_shuffle_nids = np.concatenate(local_shuffle_nids)
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
-        node_list = edge_data[constants.GLOBAL_SRC_ID+"/"+str(local_part_id)]  
+        node_list = edge_data[
+            constants.GLOBAL_SRC_ID + "/" + str(local_part_id)
+        ]
        # Determine the no. of times each process has to send alltoall messages.
-        all_sizes = allgather_sizes([node_list.shape[0]], world_size, num_parts, return_sizes=True)
+        all_sizes = allgather_sizes(
+            [node_list.shape[0]], world_size, num_parts, return_sizes=True
+        )
        max_count = np.amax(all_sizes)
        num_splits = max_count // BATCH_SIZE + 1
@@ -117,26 +140,49 @@ def lookup_shuffle_global_nids_edges(rank, world_size, num_parts, edge_data, id_
        splits = np.array_split(node_list, num_splits)
        shuffle_mappings = []
        for item in splits:
-            shuffle_ids = id_lookup.get_shuffle_nids(item, local_nids, local_shuffle_nids, world_size)
+            shuffle_ids = id_lookup.get_shuffle_nids(
+                item, local_nids, local_shuffle_nids, world_size
+            )
            shuffle_mappings.append(shuffle_ids)
        shuffle_ids = np.concatenate(shuffle_mappings)
        assert shuffle_ids.shape[0] == node_list.shape[0]
-        edge_data[constants.SHUFFLE_GLOBAL_SRC_ID+"/"+str(local_part_id)] = shuffle_ids
+        edge_data[
+            constants.SHUFFLE_GLOBAL_SRC_ID + "/" + str(local_part_id)
+        ] = shuffle_ids
        # Destination end points of edges are owned by the current node and therefore
        # should have corresponding SHUFFLE_GLOBAL_NODE_IDs.
        # Here retrieve SHUFFLE_GLOBAL_NODE_IDs for the destination end points of local edges.
-        uniq_ids, inverse_idx = np.unique(edge_data[constants.GLOBAL_DST_ID+"/"+str(local_part_id)], return_inverse=True)
+        uniq_ids, inverse_idx = np.unique(
-        common, idx1, idx2 = np.intersect1d(uniq_ids, node_data[constants.GLOBAL_NID+"/"+str(local_part_id)], assume_unique=True, return_indices=True)
+            edge_data[constants.GLOBAL_DST_ID + "/" + str(local_part_id)],
+            return_inverse=True,
+        )
+        common, idx1, idx2 = np.intersect1d(
+            uniq_ids,
+            node_data[constants.GLOBAL_NID + "/" + str(local_part_id)],
+            assume_unique=True,
+            return_indices=True,
+        )
        assert len(common) == len(uniq_ids)
-        edge_data[constants.SHUFFLE_GLOBAL_DST_ID+"/"+str(local_part_id)] = node_data[constants.SHUFFLE_GLOBAL_NID+"/"+str(local_part_id)][idx2][inverse_idx]
+        edge_data[
-        assert len(edge_data[constants.SHUFFLE_GLOBAL_DST_ID+"/"+str(local_part_id)]) == len(edge_data[constants.GLOBAL_DST_ID+"/"+str(local_part_id)])
+            constants.SHUFFLE_GLOBAL_DST_ID + "/" + str(local_part_id)
+        ] = node_data[constants.SHUFFLE_GLOBAL_NID + "/" + str(local_part_id)][
+            idx2
+        ][
+            inverse_idx
+        ]
+        assert len(
+            edge_data[
+                constants.SHUFFLE_GLOBAL_DST_ID + "/" + str(local_part_id)
+            ]
+        ) == len(edge_data[constants.GLOBAL_DST_ID + "/" + str(local_part_id)])
    memory_snapshot("GlobalToShuffleIDMap_AfterLookupServiceCalls: ", rank)
    return edge_data
 def assign_shuffle_global_nids_nodes(rank, world_size, num_parts, node_data):
    """
    Utility function to assign shuffle global ids to nodes at a given rank
@@ -162,17 +208,27 @@ def assign_shuffle_global_nids_nodes(rank, world_size, num_parts, node_data):
    """
    # Compute prefix sum to determine node-id offsets
    local_row_counts = []
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
-        local_row_counts.append(node_data[constants.GLOBAL_NID+"/"+str(local_part_id)].shape[0])
+        local_row_counts.append(
+            node_data[constants.GLOBAL_NID + "/" + str(local_part_id)].shape[0]
+        )
    # Perform allgather to compute the local offsets.
    prefix_sum_nodes = allgather_sizes(local_row_counts, world_size, num_parts)
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
-        shuffle_global_nid_start = prefix_sum_nodes[rank + (local_part_id*world_size)]
+        shuffle_global_nid_start = prefix_sum_nodes[
-        shuffle_global_nid_end = prefix_sum_nodes[rank + 1 + (local_part_id*world_size)]
+            rank + (local_part_id * world_size)
-        shuffle_global_nids = np.arange(shuffle_global_nid_start, shuffle_global_nid_end, dtype=np.int64)
+        ]
-        node_data[constants.SHUFFLE_GLOBAL_NID+"/"+str(local_part_id)] = shuffle_global_nids
+        shuffle_global_nid_end = prefix_sum_nodes[
+            rank + 1 + (local_part_id * world_size)
+        ]
+        shuffle_global_nids = np.arange(
+            shuffle_global_nid_start, shuffle_global_nid_end, dtype=np.int64
+        )
+        node_data[
+            constants.SHUFFLE_GLOBAL_NID + "/" + str(local_part_id)
+        ] = shuffle_global_nids
 def assign_shuffle_global_nids_edges(rank, world_size, num_parts, edge_data):
@@ -198,19 +254,31 @@ def assign_shuffle_global_nids_edges(rank, world_size, num_parts, edge_data):
        shuffle_global_eid_start, which indicates the starting value from which shuffle_global-ids are assigned to edges
        on this rank
    """
-    #get prefix sum of edge counts per rank to locate the starting point
+    # get prefix sum of edge counts per rank to locate the starting point
-    #from which global-ids to edges are assigned in the current rank
+    # from which global-ids to edges are assigned in the current rank
    local_row_counts = []
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
-        local_row_counts.append(edge_data[constants.GLOBAL_SRC_ID+"/"+str(local_part_id)].shape[0])
+        local_row_counts.append(
+            edge_data[constants.GLOBAL_SRC_ID + "/" + str(local_part_id)].shape[
+                0
+            ]
+        )
    shuffle_global_eid_offset = []
    prefix_sum_edges = allgather_sizes(local_row_counts, world_size, num_parts)
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
-        shuffle_global_eid_start = prefix_sum_edges[rank + (local_part_id*world_size)]
+        shuffle_global_eid_start = prefix_sum_edges[
-        shuffle_global_eid_end = prefix_sum_edges[rank + 1 + (local_part_id*world_size)]
+            rank + (local_part_id * world_size)
-        shuffle_global_eids = np.arange(shuffle_global_eid_start, shuffle_global_eid_end, dtype=np.int64)
+        ]
-        edge_data[constants.SHUFFLE_GLOBAL_EID+"/"+str(local_part_id)] = shuffle_global_eids
+        shuffle_global_eid_end = prefix_sum_edges[
+            rank + 1 + (local_part_id * world_size)
+        ]
+        shuffle_global_eids = np.arange(
+            shuffle_global_eid_start, shuffle_global_eid_end, dtype=np.int64
+        )
+        edge_data[
+            constants.SHUFFLE_GLOBAL_EID + "/" + str(local_part_id)
+        ] = shuffle_global_eids
        shuffle_global_eid_offset.append(shuffle_global_eid_start)
    return shuffle_global_eid_offset
--- a/tools/distpartitioning/gloo_wrapper.py
+++ b/tools/distpartitioning/gloo_wrapper.py
@@ -2,6 +2,7 @@ import numpy as np
 import torch
 import torch.distributed as dist
 def allgather_sizes(send_data, world_size, num_parts, return_sizes=False):
    """
    Perform all gather on list lengths, used to compute prefix sums
@@ -29,33 +30,35 @@ def allgather_sizes(send_data, world_size, num_parts, return_sizes=False):
    # Assert on the world_size, num_parts
    assert (num_parts % world_size) == 0
-    #compute the length of the local data
+    # compute the length of the local data
    send_length = len(send_data)
    out_tensor = torch.as_tensor(send_data, dtype=torch.int64)
-    in_tensor = [torch.zeros(send_length, dtype=torch.int64) 
+    in_tensor = [
-                    for _ in range(world_size)]
+        torch.zeros(send_length, dtype=torch.int64) for _ in range(world_size)
+    ]
-    #all_gather message
+    # all_gather message
    dist.all_gather(in_tensor, out_tensor)
    # Return on the raw sizes from each process
    if return_sizes:
        return torch.cat(in_tensor).numpy()
-    #gather sizes in on array to return to the invoking function
+    # gather sizes in on array to return to the invoking function
    rank_sizes = np.zeros(num_parts + 1, dtype=np.int64)
    part_counts = torch.cat(in_tensor).numpy()
    count = rank_sizes[0]
    idx = 1
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
        for r in range(world_size):
-            count += part_counts[r*(num_parts//world_size) + local_part_id]
+            count += part_counts[r * (num_parts // world_size) + local_part_id]
            rank_sizes[idx] = count
            idx += 1
    return rank_sizes
 def __alltoall_cpu(rank, world_size, output_tensor_list, input_tensor_list):
    """
    Each process scatters list of input tensors to all processes in a cluster
@@ -72,23 +75,28 @@ def __alltoall_cpu(rank, world_size, output_tensor_list, input_tensor_list):
    input_tensor_list : List of tensor
        The tensors to exchange
    """
-    input_tensor_list = [tensor.to(torch.device('cpu')) for tensor in input_tensor_list]
+    input_tensor_list = [
+        tensor.to(torch.device("cpu")) for tensor in input_tensor_list
+    ]
    # TODO(#5002): As Boolean data is not supported in
    # ``torch.distributed.scatter()``, we convert boolean into uint8 before
    # scatter and convert it back afterwards.
-    dtypes = [ t.dtype for t in input_tensor_list]
+    dtypes = [t.dtype for t in input_tensor_list]
    for i, dtype in enumerate(dtypes):
        if dtype == torch.bool:
            input_tensor_list[i] = input_tensor_list[i].to(torch.int8)
            output_tensor_list[i] = output_tensor_list[i].to(torch.int8)
    for i in range(world_size):
-        dist.scatter(output_tensor_list[i], input_tensor_list if i == rank else [], src=i)
+        dist.scatter(
+            output_tensor_list[i], input_tensor_list if i == rank else [], src=i
+        )
    # Convert back to original dtype
    for i, dtype in enumerate(dtypes):
        if dtype == torch.bool:
            input_tensor_list[i] = input_tensor_list[i].to(dtype)
            output_tensor_list[i] = output_tensor_list[i].to(dtype)
 def alltoallv_cpu(rank, world_size, input_tensor_list, retain_nones=True):
    """
    Wrapper function to providing the alltoallv functionality by using underlying alltoall
@@ -120,58 +128,76 @@ def alltoallv_cpu(rank, world_size, input_tensor_list, retain_nones=True):
        list of tensors received from other processes during alltoall message
    """
-    #ensure len of input_tensor_list is same as the world_size.
+    # ensure len of input_tensor_list is same as the world_size.
    assert input_tensor_list != None
    assert len(input_tensor_list) == world_size
-    #ensure that all the tensors in the input_tensor_list are of same size.
+    # ensure that all the tensors in the input_tensor_list are of same size.
    sizes = [list(x.size()) for x in input_tensor_list]
-    for idx in range(1,len(sizes)):
+    for idx in range(1, len(sizes)):
-        assert len(sizes[idx-1]) == len(sizes[idx]) #no. of dimensions should be same
+        assert len(sizes[idx - 1]) == len(
-        assert input_tensor_list[idx-1].dtype == input_tensor_list[idx].dtype # dtype should be same 
+            sizes[idx]
-        assert sizes[idx-1][1:] == sizes[idx][1:] #except first dimension remaining dimensions should all be the same
+        )  # no. of dimensions should be same
+        assert (
-    #decide how much to pad. 
+            input_tensor_list[idx - 1].dtype == input_tensor_list[idx].dtype
-    #always use the first-dimension for padding. 
+        )  # dtype should be same
-    ll = [ x[0] for x in sizes ]
+        assert (
+            sizes[idx - 1][1:] == sizes[idx][1:]
-    #dims of the padding needed, if any
+        )  # except first dimension remaining dimensions should all be the same
-    #these dims are used for padding purposes.
-    diff_dims = [ [np.amax(ll) - l[0]] + l[1:] for l in sizes ]
+    # decide how much to pad.
+    # always use the first-dimension for padding.
-    #pad the actual message
+    ll = [x[0] for x in sizes]
-    input_tensor_list = [torch.cat((x, torch.zeros(diff_dims[idx]).type(x.dtype))) for idx, x in enumerate(input_tensor_list)]
+    # dims of the padding needed, if any
-    #send useful message sizes to all
+    # these dims are used for padding purposes.
+    diff_dims = [[np.amax(ll) - l[0]] + l[1:] for l in sizes]
+    # pad the actual message
+    input_tensor_list = [
+        torch.cat((x, torch.zeros(diff_dims[idx]).type(x.dtype)))
+        for idx, x in enumerate(input_tensor_list)
+    ]
+    # send useful message sizes to all
    send_counts = []
    recv_counts = []
    for idx in range(world_size):
-        #send a vector, of atleast 3 elements, [a, b, ....] where 
+        # send a vector, of atleast 3 elements, [a, b, ....] where
-        #a = useful message dim, b = actual message outgoing message size along the first dimension
+        # a = useful message dim, b = actual message outgoing message size along the first dimension
-        #and remaining elements are the remaining dimensions of the tensor
+        # and remaining elements are the remaining dimensions of the tensor
-        send_counts.append(torch.from_numpy(np.array([sizes[idx][0]] + [np.amax(ll)] + sizes[idx][1:] )).type(torch.int64))
+        send_counts.append(
-        recv_counts.append(torch.zeros((1 + len(sizes[idx])), dtype=torch.int64))
+            torch.from_numpy(
+                np.array([sizes[idx][0]] + [np.amax(ll)] + sizes[idx][1:])
+            ).type(torch.int64)
+        )
+        recv_counts.append(
+            torch.zeros((1 + len(sizes[idx])), dtype=torch.int64)
+        )
    __alltoall_cpu(rank, world_size, recv_counts, send_counts)
-    #allocate buffers for receiving message
+    # allocate buffers for receiving message
    output_tensor_list = []
-    recv_counts = [ tsize.numpy() for tsize in recv_counts]
+    recv_counts = [tsize.numpy() for tsize in recv_counts]
    for idx, tsize in enumerate(recv_counts):
-        output_tensor_list.append(torch.zeros(tuple(tsize[1:])).type(input_tensor_list[idx].dtype))
+        output_tensor_list.append(
+            torch.zeros(tuple(tsize[1:])).type(input_tensor_list[idx].dtype)
+        )
-    #send actual message itself. 
+    # send actual message itself.
    __alltoall_cpu(rank, world_size, output_tensor_list, input_tensor_list)
-    #extract un-padded message from the output_tensor_list and return it
+    # extract un-padded message from the output_tensor_list and return it
    return_vals = []
    for s, t in zip(recv_counts, output_tensor_list):
        if s[0] == 0:
            if retain_nones:
                return_vals.append(None)
        else:
-            return_vals.append(t[0:s[0]])
+            return_vals.append(t[0 : s[0]])
    return return_vals
 def gather_metadata_json(metadata, rank, world_size):
    """
    Gather an object (json schema on `rank`)
@@ -187,10 +213,10 @@ def gather_metadata_json(metadata, rank, world_size):
        objects from each rank in the world
    """
-    #Populate input obj and output obj list on rank-0 and non-rank-0 machines
+    # Populate input obj and output obj list on rank-0 and non-rank-0 machines
    input_obj = None if rank == 0 else metadata
    output_objs = [None for _ in range(world_size)] if rank == 0 else None
-    #invoke the gloo method to perform gather on rank-0
+    # invoke the gloo method to perform gather on rank-0
    dist.gather_object(input_obj, output_objs, dst=0)
    return output_objs
--- a/tools/distpartitioning/parmetis_postprocess.py
+++ b/tools/distpartitioning/parmetis_postprocess.py
@@ -5,13 +5,13 @@ import platform
 import sys
 from pathlib import Path
+import constants
 import numpy as np
 import pyarrow
 import pyarrow.csv as csv
+from partition_algo.base import dump_partition_meta, PartitionMeta
-import constants
 from utils import get_idranges, get_node_types, read_json
-from partition_algo.base import PartitionMeta, dump_partition_meta
 def post_process(params):

--- a/tools/distpartitioning/parmetis_preprocess.py
+++ b/tools/distpartitioning/parmetis_preprocess.py
@@ -4,14 +4,14 @@ import os
 import sys
 from pathlib import Path
+import constants
 import numpy as np
 import pyarrow
 import pyarrow.csv as csv
 import pyarrow.parquet as pq
 import torch
 import torch.distributed as dist
-import constants
 from utils import get_idranges, get_node_types, read_json
 import array_readwriter
@@ -33,12 +33,13 @@ def get_proc_info():
    # mpich
    if "PMI_RANK" in env_variables:
        return int(env_variables["PMI_RANK"])
-    #openmpi
+    # openmpi
    elif "OMPI_COMM_WORLD_RANK" in env_variables:
        return int(env_variables["OMPI_COMM_WORLD_RANK"])
    else:
        return 0
 def gen_edge_files(schema_map, output):
    """Function to create edges files to be consumed by ParMETIS
    for partitioning purposes.
@@ -106,12 +107,16 @@ def gen_edge_files(schema_map, output):
            options = csv.WriteOptions(include_header=False, delimiter=" ")
            options.delimiter = " "
            csv.write_csv(
-                pyarrow.Table.from_arrays(cols, names=col_names), out_file, options
+                pyarrow.Table.from_arrays(cols, names=col_names),
+                out_file,
+                options,
            )
            return out_file
        if edges_format == constants.STR_CSV:
-            delimiter = etype_info[constants.STR_FORMAT][constants.STR_FORMAT_DELIMITER]
+            delimiter = etype_info[constants.STR_FORMAT][
+                constants.STR_FORMAT_DELIMITER
+            ]
            data_df = csv.read_csv(
                edge_data_files[rank],
                read_options=pyarrow.csv.ReadOptions(
@@ -314,11 +319,17 @@ def gen_parmetis_input_args(params, schema_map):
    #   num_constraints = no. of node types + train_mask + test_mask + val_mask
    #   Here, (train/test/val) masks will be set to 1 if these masks exist for
    #   all the node types in the graph, otherwise these flags will be set to 0
-    assert constants.STR_GRAPH_NAME in schema_map, "Graph name is not present in the json file"
+    assert (
+        constants.STR_GRAPH_NAME in schema_map
+    ), "Graph name is not present in the json file"
    graph_name = schema_map[constants.STR_GRAPH_NAME]
-    if not os.path.isfile(f'{graph_name}_stats.txt'):
+    if not os.path.isfile(f"{graph_name}_stats.txt"):
-        num_nodes = np.sum(np.concatenate(schema_map[constants.STR_NUM_NODES_PER_CHUNK]))
+        num_nodes = np.sum(
-        num_edges = np.sum(np.concatenate(schema_map[constants.STR_NUM_EDGES_PER_CHUNK]))
+            np.concatenate(schema_map[constants.STR_NUM_NODES_PER_CHUNK])
+        )
+        num_edges = np.sum(
+            np.concatenate(schema_map[constants.STR_NUM_EDGES_PER_CHUNK])
+        )
        num_ntypes = len(schema_map[constants.STR_NODE_TYPE])
        train_mask = test_mask = val_mask = 0
@@ -335,8 +346,8 @@ def gen_parmetis_input_args(params, schema_map):
        val_mask = val_mask // num_ntypes
        num_constraints = num_ntypes + train_mask + test_mask + val_mask
-        with open(f'{graph_name}_stats.txt', 'w') as sf:
+        with open(f"{graph_name}_stats.txt", "w") as sf:
-            sf.write(f'{num_nodes} {num_edges} {num_constraints}')
+            sf.write(f"{num_nodes} {num_edges} {num_constraints}")
    node_files = []
    outdir = Path(params.output_dir)

--- a/tools/distpartitioning/utils.py
+++ b/tools/distpartitioning/utils.py
@@ -3,13 +3,13 @@ import logging
 import os
 import constants
+import dgl
 import numpy as np
 import psutil
 import pyarrow
-from pyarrow import csv
-import dgl
 from dgl.distributed.partition import _dump_part_config
+from pyarrow import csv
 def read_ntype_partition_files(schema_map, input_dir):
@@ -38,17 +38,22 @@ def read_ntype_partition_files(schema_map, input_dir):
    """
    assert os.path.isdir(input_dir)
-    #iterate over the node types and extract the partition id mappings
+    # iterate over the node types and extract the partition id mappings
    part_ids = []
    ntype_names = schema_map[constants.STR_NODE_TYPE]
    for ntype in ntype_names:
-        df = csv.read_csv(os.path.join(input_dir, '{}.txt'.format(ntype)), \
+        df = csv.read_csv(
-                read_options=pyarrow.csv.ReadOptions(autogenerate_column_names=True), \
+            os.path.join(input_dir, "{}.txt".format(ntype)),
-                parse_options=pyarrow.csv.ParseOptions(delimiter=' '))
+            read_options=pyarrow.csv.ReadOptions(
-        ntype_partids = df['f0'].to_numpy()
+                autogenerate_column_names=True
+            ),
+            parse_options=pyarrow.csv.ParseOptions(delimiter=" "),
+        )
+        ntype_partids = df["f0"].to_numpy()
        part_ids.append(ntype_partids)
    return np.concatenate(part_ids)
 def read_json(json_file):
    """
    Utility method to read a json file schema
@@ -67,6 +72,7 @@ def read_json(json_file):
    return val
 def get_etype_featnames(etype_name, schema_map):
    """Retrieves edge feature names for a given edge_type
@@ -88,6 +94,7 @@ def get_etype_featnames(etype_name, schema_map):
    feats = edge_data.get(etype_name, {})
    return [feat for feat in feats]
 def get_ntype_featnames(ntype_name, schema_map):
    """
    Retrieves node feature names for a given node_type
@@ -110,6 +117,7 @@ def get_ntype_featnames(ntype_name, schema_map):
    feats = node_data.get(ntype_name, {})
    return [feat for feat in feats]
 def get_edge_types(schema_map):
    """Utility method to extract edge_typename -> edge_type mappings
    as defined by the input schema
@@ -130,10 +138,11 @@ def get_edge_types(schema_map):
        with keys as etype ids (integers) and values as edge type names
    """
    etypes = schema_map[constants.STR_EDGE_TYPE]
-    etype_etypeid_map = {e : i for i, e in enumerate(etypes)}
+    etype_etypeid_map = {e: i for i, e in enumerate(etypes)}
-    etypeid_etype_map = {i : e for i, e in enumerate(etypes)}
+    etypeid_etype_map = {i: e for i, e in enumerate(etypes)}
    return etype_etypeid_map, etypes, etypeid_etype_map
 def get_node_types(schema_map):
    """
    Utility method to extract node_typename -> node_type mappings
@@ -155,10 +164,11 @@ def get_node_types(schema_map):
        with keys as ntype ids (integers) and values as node type names
    """
    ntypes = schema_map[constants.STR_NODE_TYPE]
-    ntype_ntypeid_map = {e : i for i, e in enumerate(ntypes)}
+    ntype_ntypeid_map = {e: i for i, e in enumerate(ntypes)}
-    ntypeid_ntype_map = {i : e for i, e in enumerate(ntypes)}
+    ntypeid_ntype_map = {i: e for i, e in enumerate(ntypes)}
    return ntype_ntypeid_map, ntypes, ntypeid_ntype_map
 def get_gnid_range_map(node_tids):
    """
    Retrieves auxiliary dictionaries from the metadata json object
@@ -186,7 +196,10 @@ def get_gnid_range_map(node_tids):
    return ntypes_gid_range
-def write_metadata_json(input_list, output_dir, graph_name, world_size, num_parts):
+def write_metadata_json(
+    input_list, output_dir, graph_name, world_size, num_parts
+):
    """
    Merge json schema's from each of the rank's on rank-0.
    This utility function, to be used on rank-0, to create aggregated json file.
@@ -204,47 +217,70 @@ def write_metadata_json(input_list, output_dir, graph_name, world_size, num_part
    # each dictionary will contain num_parts/world_size metadata json
    # which correspond to local partitions on the respective ranks.
    metadata_list = []
-    for local_part_id in range(num_parts//world_size):
+    for local_part_id in range(num_parts // world_size):
        for idx in range(world_size):
-            metadata_list.append(input_list[idx]["local-part-id-"+str(local_part_id*world_size + idx)])
+            metadata_list.append(
+                input_list[idx][
+                    "local-part-id-" + str(local_part_id * world_size + idx)
+                ]
+            )
-    #Initialize global metadata
+    # Initialize global metadata
    graph_metadata = {}
-    #Merge global_edge_ids from each json object in the input list
+    # Merge global_edge_ids from each json object in the input list
    edge_map = {}
    x = metadata_list[0]["edge_map"]
    for k in x:
        edge_map[k] = []
        for idx in range(len(metadata_list)):
-            edge_map[k].append([int(metadata_list[idx]["edge_map"][k][0][0]),int(metadata_list[idx]["edge_map"][k][0][1])])
+            edge_map[k].append(
+                [
+                    int(metadata_list[idx]["edge_map"][k][0][0]),
+                    int(metadata_list[idx]["edge_map"][k][0][1]),
+                ]
+            )
    graph_metadata["edge_map"] = edge_map
    graph_metadata["etypes"] = metadata_list[0]["etypes"]
    graph_metadata["graph_name"] = metadata_list[0]["graph_name"]
    graph_metadata["halo_hops"] = metadata_list[0]["halo_hops"]
-    #Merge global_nodeids from each of json object in the input list
+    # Merge global_nodeids from each of json object in the input list
    node_map = {}
    x = metadata_list[0]["node_map"]
    for k in x:
        node_map[k] = []
        for idx in range(len(metadata_list)):
-            node_map[k].append([int(metadata_list[idx]["node_map"][k][0][0]), int(metadata_list[idx]["node_map"][k][0][1])])
+            node_map[k].append(
+                [
+                    int(metadata_list[idx]["node_map"][k][0][0]),
+                    int(metadata_list[idx]["node_map"][k][0][1]),
+                ]
+            )
    graph_metadata["node_map"] = node_map
    graph_metadata["ntypes"] = metadata_list[0]["ntypes"]
-    graph_metadata["num_edges"] = int(sum([metadata_list[i]["num_edges"] for i in range(len(metadata_list))]))
+    graph_metadata["num_edges"] = int(
-    graph_metadata["num_nodes"] = int(sum([metadata_list[i]["num_nodes"] for i in range(len(metadata_list))]))
+        sum([metadata_list[i]["num_edges"] for i in range(len(metadata_list))])
+    )
+    graph_metadata["num_nodes"] = int(
+        sum([metadata_list[i]["num_nodes"] for i in range(len(metadata_list))])
+    )
    graph_metadata["num_parts"] = metadata_list[0]["num_parts"]
    graph_metadata["part_method"] = metadata_list[0]["part_method"]
    for i in range(len(metadata_list)):
-        graph_metadata["part-{}".format(i)] = metadata_list[i]["part-{}".format(i)]
+        graph_metadata["part-{}".format(i)] = metadata_list[i][
+            "part-{}".format(i)
+        ]
+    _dump_part_config(f"{output_dir}/metadata.json", graph_metadata)
-    _dump_part_config(f'{output_dir}/metadata.json', graph_metadata)
-def augment_edge_data(edge_data, lookup_service, edge_tids, rank, world_size, num_parts):
+def augment_edge_data(
+    edge_data, lookup_service, edge_tids, rank, world_size, num_parts
+):
    """
    Add partition-id (rank which owns an edge) column to the edge_data.
@@ -272,7 +308,7 @@ def augment_edge_data(edge_data, lookup_service, edge_tids, rank, world_size, nu
        loaded from input dataset files. In addition to this we include additional columns which
        aid this pipelines computation, like constants.OWNER_PROCESS
    """
-    #add global_nids to the node_data
+    # add global_nids to the node_data
    etype_offset = {}
    offset = 0
    for etype_name, tid_range in edge_tids.items():
@@ -293,10 +329,13 @@ def augment_edge_data(edge_data, lookup_service, edge_tids, rank, world_size, nu
    assert global_eids.shape[0] == edge_data[constants.ETYPE_ID].shape[0]
    edge_data[constants.GLOBAL_EID] = global_eids
-    #assign the owner process/rank for each edge 
+    # assign the owner process/rank for each edge
-    edge_data[constants.OWNER_PROCESS] = lookup_service.get_partition_ids(edge_data[constants.GLOBAL_DST_ID])
+    edge_data[constants.OWNER_PROCESS] = lookup_service.get_partition_ids(
+        edge_data[constants.GLOBAL_DST_ID]
+    )
    return edge_data
 def read_edges_file(edge_file, edge_data_dict):
    """
    Utility function to read xxx_edges.txt file
@@ -315,20 +354,24 @@ def read_edges_file(edge_file, edge_data_dict):
    if edge_file == "" or edge_file == None:
        return None
-    #Read the file from here.
+    # Read the file from here.
-    #<global_src_id> <global_dst_id> <type_eid> <etype> <attributes>
+    # <global_src_id> <global_dst_id> <type_eid> <etype> <attributes>
    # global_src_id -- global idx for the source node ... line # in the graph_nodes.txt
    # global_dst_id -- global idx for the destination id node ... line # in the graph_nodes.txt
-    edge_data_df = csv.read_csv(edge_file, read_options=pyarrow.csv.ReadOptions(autogenerate_column_names=True), 
+    edge_data_df = csv.read_csv(
-                                    parse_options=pyarrow.csv.ParseOptions(delimiter=' '))
+        edge_file,
+        read_options=pyarrow.csv.ReadOptions(autogenerate_column_names=True),
+        parse_options=pyarrow.csv.ParseOptions(delimiter=" "),
+    )
    edge_data_dict = {}
-    edge_data_dict[constants.GLOBAL_SRC_ID] = edge_data_df['f0'].to_numpy()
+    edge_data_dict[constants.GLOBAL_SRC_ID] = edge_data_df["f0"].to_numpy()
-    edge_data_dict[constants.GLOBAL_DST_ID] = edge_data_df['f1'].to_numpy()
+    edge_data_dict[constants.GLOBAL_DST_ID] = edge_data_df["f1"].to_numpy()
-    edge_data_dict[constants.GLOBAL_TYPE_EID] = edge_data_df['f2'].to_numpy()
+    edge_data_dict[constants.GLOBAL_TYPE_EID] = edge_data_df["f2"].to_numpy()
-    edge_data_dict[constants.ETYPE_ID] = edge_data_df['f3'].to_numpy()
+    edge_data_dict[constants.ETYPE_ID] = edge_data_df["f3"].to_numpy()
    return edge_data_dict
 def read_node_features_file(nodes_features_file):
    """
    Utility function to load tensors from a file
@@ -347,6 +390,7 @@ def read_node_features_file(nodes_features_file):
    node_features = dgl.data.utils.load_tensors(nodes_features_file, False)
    return node_features
 def read_edge_features_file(edge_features_file):
    """
    Utility function to load tensors from a file
@@ -364,6 +408,7 @@ def read_edge_features_file(edge_features_file):
    edge_features = dgl.data.utils.load_tensors(edge_features_file, True)
    return edge_features
 def write_node_features(node_features, node_file):
    """
    Utility function to serialize node_features in node_file file
@@ -377,6 +422,7 @@ def write_node_features(node_features, node_file):
    """
    dgl.data.utils.save_tensors(node_file, node_features)
 def write_edge_features(edge_features, edge_file):
    """
    Utility function to serialize edge_features in edge_file file
@@ -390,6 +436,7 @@ def write_edge_features(edge_features, edge_file):
    """
    dgl.data.utils.save_tensors(edge_file, edge_features)
 def write_graph_dgl(graph_file, graph_obj, formats, sort_etypes):
    """
    Utility function to serialize graph dgl objects
@@ -405,11 +452,22 @@ def write_graph_dgl(graph_file, graph_obj, formats, sort_etypes):
    sort_etypes : bool
        Whether to sort etypes in csc/csr.
    """
-    dgl.distributed.partition._save_graphs(graph_file, [graph_obj],
+    dgl.distributed.partition._save_graphs(
-        formats, sort_etypes)
+        graph_file, [graph_obj], formats, sort_etypes
+    )
-def write_dgl_objects(graph_obj, node_features, edge_features,
-        output_dir, part_id, orig_nids, orig_eids, formats, sort_etypes):
+def write_dgl_objects(
+    graph_obj,
+    node_features,
+    edge_features,
+    output_dir,
+    part_id,
+    orig_nids,
+    orig_eids,
+    formats,
+    sort_etypes,
+):
    """
    Wrapper function to write graph, node/edge feature, original node/edge IDs.
@@ -434,24 +492,30 @@ def write_dgl_objects(graph_obj, node_features, edge_features,
    sort_etypes : bool
        Whether to sort etypes in csc/csr.
    """
-    part_dir = output_dir + '/part' + str(part_id)
+    part_dir = output_dir + "/part" + str(part_id)
    os.makedirs(part_dir, exist_ok=True)
-    write_graph_dgl(os.path.join(part_dir ,'graph.dgl'), graph_obj,
+    write_graph_dgl(
-        formats, sort_etypes)
+        os.path.join(part_dir, "graph.dgl"), graph_obj, formats, sort_etypes
+    )
    if node_features != None:
-        write_node_features(node_features, os.path.join(part_dir, "node_feat.dgl"))
+        write_node_features(
+            node_features, os.path.join(part_dir, "node_feat.dgl")
+        )
-    if (edge_features != None):
+    if edge_features != None:
-        write_edge_features(edge_features, os.path.join(part_dir, "edge_feat.dgl"))
+        write_edge_features(
+            edge_features, os.path.join(part_dir, "edge_feat.dgl")
+        )
    if orig_nids is not None:
-        orig_nids_file = os.path.join(part_dir, 'orig_nids.dgl')
+        orig_nids_file = os.path.join(part_dir, "orig_nids.dgl")
        dgl.data.utils.save_tensors(orig_nids_file, orig_nids)
    if orig_eids is not None:
-        orig_eids_file = os.path.join(part_dir, 'orig_eids.dgl')
+        orig_eids_file = os.path.join(part_dir, "orig_eids.dgl")
        dgl.data.utils.save_tensors(orig_eids_file, orig_eids)
 def get_idranges(names, counts, num_chunks=None):
    """
    Utility function to compute typd_id/global_id ranges for both nodes and edges.
@@ -492,7 +556,7 @@ def get_idranges(names, counts, num_chunks=None):
        gnid_end += tid_ranges[-1][1]
        tid_dict[typename] = tid_ranges
-        gid_dict[typename] = np.array([gnid_start, gnid_end]).reshape([1,2])
+        gid_dict[typename] = np.array([gnid_start, gnid_end]).reshape([1, 2])
        gnid_start = gnid_end
        orig_num_chunks = len(tid_start)
@@ -500,14 +564,17 @@ def get_idranges(names, counts, num_chunks=None):
    if num_chunks is None:
        return tid_dict, gid_dict
-    assert num_chunks <= orig_num_chunks, \
+    assert (
-        'Specified number of chunks should be less/euqual than original numbers of ID ranges.'
+        num_chunks <= orig_num_chunks
+    ), "Specified number of chunks should be less/euqual than original numbers of ID ranges."
    chunk_list = np.array_split(np.arange(orig_num_chunks), num_chunks)
    for typename in tid_dict:
        orig_tid_ranges = tid_dict[typename]
        tid_ranges = []
        for idx in chunk_list:
-            tid_ranges.append((orig_tid_ranges[idx[0]][0], orig_tid_ranges[idx[-1]][-1]))
+            tid_ranges.append(
+                (orig_tid_ranges[idx[0]][0], orig_tid_ranges[idx[-1]][-1])
+            )
        tid_dict[typename] = tid_ranges
    return tid_dict, gid_dict
@@ -529,14 +596,14 @@ def memory_snapshot(tag, rank):
    MB = 1024 * 1024
    KB = 1024
-    peak = dgl.partition.get_peak_mem()*KB
+    peak = dgl.partition.get_peak_mem() * KB
    mem = psutil.virtual_memory()
    avail = mem.available / MB
    used = mem.used / MB
    total = mem.total / MB
-    mem_string = f'{total:.0f} (MB) total, {peak:.0f} (MB) peak, {used:.0f} (MB) used, {avail:.0f} (MB) avail'
+    mem_string = f"{total:.0f} (MB) total, {peak:.0f} (MB) peak, {used:.0f} (MB) used, {avail:.0f} (MB) avail"
-    logging.debug(f'[Rank: {rank} MEMORY_SNAPSHOT] {mem_string} - {tag}')
+    logging.debug(f"[Rank: {rank} MEMORY_SNAPSHOT] {mem_string} - {tag}")
 def map_partid_rank(partid, world_size):

--- a/tools/partition_algo/base.py
+++ b/tools/partition_algo/base.py
+import json
 from typing import Optional
 import pydantic as dt
-import json
 from dgl import DGLError
 class PartitionMeta(dt.BaseModel):
-    """ Metadata that describes the partition assignment results.
+    """Metadata that describes the partition assignment results.
    Regardless of the choice of partitioning algorithm, a metadata JSON file
    will be created in the output directory which includes the meta information
@@ -22,15 +24,17 @@ class PartitionMeta(dt.BaseModel):
    ...     part_meta = PartitionMeta(**(json.load(f)))
    """
    # version of metadata JSON.
-    version: Optional[str] = '1.0.0'
+    version: Optional[str] = "1.0.0"
    # number of partitions.
    num_parts: int
    # name of partition algorithm.
    algo_name: str
 def dump_partition_meta(part_meta, meta_file):
-    """ Dump partition metadata into json file.
+    """Dump partition metadata into json file.
    Parameters
    ----------
@@ -39,11 +43,12 @@ def dump_partition_meta(part_meta, meta_file):
    meta_file : str
        The target file to save data.
    """
-    with open(meta_file, 'w') as f:
+    with open(meta_file, "w") as f:
        json.dump(part_meta.dict(), f, sort_keys=True, indent=4)
 def load_partition_meta(meta_file):
-    """ Load partition metadata and do sanity check.
+    """Load partition metadata and do sanity check.
    Parameters
    ----------
@@ -60,14 +65,18 @@ def load_partition_meta(meta_file):
            part_meta = PartitionMeta(**(json.load(f)))
        except dt.ValidationError as e:
            raise DGLError(
-                f"Invalid partition metadata JSON. Error details: {e.json()}")
+                f"Invalid partition metadata JSON. Error details: {e.json()}"
-        if part_meta.version != '1.0.0':
+            )
+        if part_meta.version != "1.0.0":
            raise DGLError(
-                f"Invalid version[{part_meta.version}]. Supported versions: '1.0.0'")
+                f"Invalid version[{part_meta.version}]. Supported versions: '1.0.0'"
+            )
        if part_meta.num_parts <= 0:
            raise DGLError(
-                f"num_parts[{part_meta.num_parts}] should be greater than 0.")
+                f"num_parts[{part_meta.num_parts}] should be greater than 0."
-        if part_meta.algo_name not in ['random', 'metis']:
+            )
+        if part_meta.algo_name not in ["random", "metis"]:
            raise DGLError(
-                f"algo_name[{part_meta.num_parts}] is not supported.")
+                f"algo_name[{part_meta.num_parts}] is not supported."
+            )
        return part_meta
--- a/tools/partition_algo/random_partition.py
+++ b/tools/partition_algo/random_partition.py
@@ -6,10 +6,11 @@ import os
 import sys
 import numpy as np
-from base import PartitionMeta, dump_partition_meta
+from base import dump_partition_meta, PartitionMeta
 from distpartitioning import array_readwriter
 from files import setdir
 def _random_partition(metadata, num_parts):
    num_nodes_per_type = [sum(_) for _ in metadata["num_nodes_per_chunk"]]
    ntypes = metadata["node_type"]