[API Deprecation]Deprecate candidates in dgl.distributed (#5116)

3a2a5031 · peizhou001 · GitHub · ab0c0ec6 · 3a2a5031 · 3a2a5031
Unverified Commit 3a2a5031 authored Jan 06, 2023 by peizhou001 Committed by GitHub Jan 06, 2023
10 changed files
--- a/python/dgl/distributed/dist_graph.py
+++ b/python/dgl/distributed/dist_graph.py
@@ -430,7 +430,7 @@ class DistGraph:
    The example shows the creation of ``DistGraph`` in the standalone mode.

    >>> dgl.distributed.partition_graph(g, 'graph_name', 1, num_hops=1, part_method='metis',
-    ...                                 out_path='output/', reshuffle=True)
+    ...                                 out_path='output/')
    >>> g = dgl.distributed.DistGraph('graph_name', part_config='output/graph_name.json')

    The example shows the creation of ``DistGraph`` in the distributed mode.
@@ -513,7 +513,7 @@ class DistGraph:
        assert self._client is not None, \
                'Distributed module is not initialized. Please call dgl.distributed.initialize.'
        self._g = _get_graph_from_shared_mem(self.graph_name)
-        self._gpb = get_shared_mem_partition_book(self.graph_name, self._g)
+        self._gpb = get_shared_mem_partition_book(self.graph_name)
        if self._gpb is None:
            self._gpb = gpb
        self._client.map_shared_data(self._gpb)

--- a/python/dgl/distributed/graph_partition_book.py
+++ b/python/dgl/distributed/graph_partition_book.py
@@ -8,7 +8,7 @@ import numpy as np
 from .. import backend as F
 from .. import utils
 from .._ffi.ndarray import empty_shared_mem
-from ..base import EID, NID, DGLError
+from ..base import DGLError
 from ..ndarray import exist_shared_mem_array
 from ..partition import NDArrayPartition
 from .constants import DEFAULT_ETYPE, DEFAULT_NTYPE
@@ -175,7 +175,7 @@ def _get_shared_mem_metadata(graph_name):
    return is_range_part, part_id, num_partitions, node_map, edge_map


-def get_shared_mem_partition_book(graph_name, graph_part):
+def get_shared_mem_partition_book(graph_name):
    """Get a graph partition book from shared memory.

    A graph partition book of a specific graph can be serialized to shared memory.
@@ -185,8 +185,6 @@ def get_shared_mem_partition_book(graph_name, graph_part):
    ----------
    graph_name : str
        The name of the graph.
-    graph_part : DGLGraph
-        The graph structure of a partition.

    Returns
    -------
@@ -225,9 +223,7 @@ def get_shared_mem_partition_book(graph_name, graph_part):
            part_id, num_parts, node_map, edge_map, ntypes, etypes
        )
    else:
-        return BasicPartitionBook(
-            part_id, num_parts, node_map_data, edge_map_data, graph_part
-        )
+        raise TypeError("Only RangePartitionBook is supported currently.")


 def get_node_partition_from_book(book, device):
@@ -278,14 +274,10 @@ class GraphPartitionBook(ABC):
    * the node IDs and the edge IDs that a partition has.
    * the local IDs of nodes and edges in a partition.

-    Currently, there are two classes that implement ``GraphPartitionBook``:
-    ``BasicGraphPartitionBook`` and ``RangePartitionBook``. ``BasicGraphPartitionBook``
-    stores the mappings between every individual node/edge ID and partition ID on
-    every machine, which usually consumes a lot of memory, while ``RangePartitionBook``
-    calculates the mapping between node/edge IDs and partition IDs based on some small
-    metadata because nodes/edges have been relabeled to have IDs in the same partition
-    fall in a contiguous ID range. ``RangePartitionBook`` is usually a preferred way to
-    provide mappings between node/edge IDs and partition IDs.
+    Currently, only one class that implement ``GraphPartitionBook``
+    :``RangePartitionBook``. It calculates the mapping between node/edge IDs
+    and partition IDs based on some small metadata because nodes/edges have been
+    relabeled to have IDs in the same partition fall in a contiguous ID range.

    A graph partition book is constructed automatically when a graph is partitioned.
    When a graph partition is loaded, a graph partition book is loaded as well.
@@ -541,262 +533,6 @@ class GraphPartitionBook(ABC):
            Homogeneous edge IDs.
        """

-
-class BasicPartitionBook(GraphPartitionBook):
-    """This provides the most flexible way to store parition information.
-
-    The partition book maintains the mapping of every single node IDs and edge IDs to
-    partition IDs. This is very flexible at the coast of large memory consumption.
-    On a large graph, the mapping consumes significant memory and this partition book
-    is not recommended.
-
-    Parameters
-    ----------
-    part_id : int
-        partition ID of current partition book
-    num_parts : int
-        number of total partitions
-    node_map : tensor
-        global node ID mapping to partition ID
-    edge_map : tensor
-        global edge ID mapping to partition ID
-    part_graph : DGLGraph
-        The graph partition structure.
-    """
-
-    def __init__(self, part_id, num_parts, node_map, edge_map, part_graph):
-        assert part_id >= 0, "part_id cannot be a negative number."
-        assert num_parts > 0, "num_parts must be greater than zero."
-        self._part_id = int(part_id)
-        self._num_partitions = int(num_parts)
-        self._nid2partid = F.tensor(node_map)
-        assert (
-            F.dtype(self._nid2partid) == F.int64
-        ), "the node map must be stored in an integer array"
-        self._eid2partid = F.tensor(edge_map)
-        assert (
-            F.dtype(self._eid2partid) == F.int64
-        ), "the edge map must be stored in an integer array"
-        # Get meta data of the partition book.
-        self._partition_meta_data = []
-        _, nid_count = np.unique(
-            F.asnumpy(self._nid2partid), return_counts=True
-        )
-        _, eid_count = np.unique(
-            F.asnumpy(self._eid2partid), return_counts=True
-        )
-        for partid in range(self._num_partitions):
-            part_info = {}
-            part_info["machine_id"] = partid
-            part_info["num_nodes"] = int(nid_count[partid])
-            part_info["num_edges"] = int(eid_count[partid])
-            self._partition_meta_data.append(part_info)
-        # Get partid2nids
-        self._partid2nids = []
-        sorted_nid = F.tensor(np.argsort(F.asnumpy(self._nid2partid)))
-        start = 0
-        for offset in nid_count:
-            part_nids = sorted_nid[start : start + offset]
-            start += offset
-            self._partid2nids.append(part_nids)
-        # Get partid2eids
-        self._partid2eids = []
-        sorted_eid = F.tensor(np.argsort(F.asnumpy(self._eid2partid)))
-        start = 0
-        for offset in eid_count:
-            part_eids = sorted_eid[start : start + offset]
-            start += offset
-            self._partid2eids.append(part_eids)
-        # Get nidg2l
-        self._nidg2l = [None] * self._num_partitions
-        global_id = part_graph.ndata[NID]
-        max_global_id = np.amax(F.asnumpy(global_id))
-        # TODO(chao): support int32 index
-        g2l = F.zeros((max_global_id + 1), F.int64, F.context(global_id))
-        g2l = F.scatter_row(g2l, global_id, F.arange(0, len(global_id)))
-        self._nidg2l[self._part_id] = g2l
-        # Get eidg2l
-        self._eidg2l = [None] * self._num_partitions
-        global_id = part_graph.edata[EID]
-        max_global_id = np.amax(F.asnumpy(global_id))
-        # TODO(chao): support int32 index
-        g2l = F.zeros((max_global_id + 1), F.int64, F.context(global_id))
-        g2l = F.scatter_row(g2l, global_id, F.arange(0, len(global_id)))
-        self._eidg2l[self._part_id] = g2l
-        # node size and edge size
-        self._edge_size = len(self.partid2eids(self._part_id))
-        self._node_size = len(self.partid2nids(self._part_id))
-
-    def shared_memory(self, graph_name):
-        """Move data to shared memory."""
-        (
-            self._meta,
-            self._nid2partid,
-            self._eid2partid,
-        ) = _move_metadata_to_shared_mem(
-            graph_name,
-            self._num_nodes(),
-            self._num_edges(),
-            self._part_id,
-            self._num_partitions,
-            self._nid2partid,
-            self._eid2partid,
-            False,
-        )
-
-    def num_partitions(self):
-        """Return the number of partitions."""
-        return self._num_partitions
-
-    def metadata(self):
-        """Return the partition meta data."""
-        return self._partition_meta_data
-
-    def _num_nodes(self, ntype=DEFAULT_NTYPE):
-        """The total number of nodes"""
-        assert (
-            ntype == DEFAULT_NTYPE
-        ), "Base partition book only supports homogeneous graph."
-        return len(self._nid2partid)
-
-    def _num_edges(self, etype=DEFAULT_ETYPE):
-        """The total number of edges"""
-        assert etype in (
-            DEFAULT_ETYPE,
-            DEFAULT_ETYPE[1],
-        ), "Base partition book only supports homogeneous graph."
-        return len(self._eid2partid)
-
-    def map_to_per_ntype(self, ids):
-        """Map global homogeneous node IDs to node type IDs.
-        Returns
-            type_ids, per_type_ids
-        """
-        return F.zeros((len(ids),), F.int32, F.cpu()), ids
-
-    def map_to_per_etype(self, ids):
-        """Map global homogeneous edge IDs to edge type IDs.
-        Returns
-            type_ids, per_type_ids
-        """
-        return F.zeros((len(ids),), F.int32, F.cpu()), ids
-
-    def map_to_homo_nid(self, ids, ntype=DEFAULT_NTYPE):
-        """Map per-node-type IDs to global node IDs in the homogeneous format."""
-        assert (
-            ntype == DEFAULT_NTYPE
-        ), "Base partition book only supports homogeneous graph."
-        return ids
-
-    def map_to_homo_eid(self, ids, etype=DEFAULT_ETYPE):
-        """Map per-edge-type IDs to global edge IDs in the homoenegeous format."""
-        assert etype in (
-            DEFAULT_ETYPE,
-            DEFAULT_ETYPE[1],
-        ), "Base partition book only supports homogeneous graph."
-        return ids
-
-    def nid2partid(self, nids, ntype=DEFAULT_NTYPE):
-        """From global node IDs to partition IDs"""
-        assert (
-            ntype == DEFAULT_NTYPE
-        ), "Base partition book only supports homogeneous graph."
-        return F.gather_row(self._nid2partid, nids)
-
-    def eid2partid(self, eids, etype=DEFAULT_ETYPE):
-        """From global edge IDs to partition IDs"""
-        assert etype in (
-            DEFAULT_ETYPE,
-            DEFAULT_ETYPE[1],
-        ), "Base partition book only supports homogeneous graph."
-        return F.gather_row(self._eid2partid, eids)
-
-    def partid2nids(self, partid, ntype=DEFAULT_NTYPE):
-        """From partition id to global node IDs"""
-        assert (
-            ntype == DEFAULT_NTYPE
-        ), "Base partition book only supports homogeneous graph."
-        return self._partid2nids[partid]
-
-    def partid2eids(self, partid, etype=DEFAULT_ETYPE):
-        """From partition id to global edge IDs"""
-        assert etype in (
-            DEFAULT_ETYPE,
-            DEFAULT_ETYPE[1],
-        ), "Base partition book only supports homogeneous graph."
-        return self._partid2eids[partid]
-
-    def nid2localnid(self, nids, partid, ntype=DEFAULT_NTYPE):
-        """Get local node IDs within the given partition."""
-        assert (
-            ntype == DEFAULT_NTYPE
-        ), "Base partition book only supports homogeneous graph."
-        if partid != self._part_id:
-            raise RuntimeError(
-                "Now GraphPartitionBook does not support \
-                getting remote tensor of nid2localnid."
-            )
-        return F.gather_row(self._nidg2l[partid], nids)
-
-    def eid2localeid(self, eids, partid, etype=DEFAULT_ETYPE):
-        """Get the local edge ids within the given partition."""
-        assert etype in (
-            DEFAULT_ETYPE,
-            DEFAULT_ETYPE[1],
-        ), "Base partition book only supports homogeneous graph."
-        if partid != self._part_id:
-            raise RuntimeError(
-                "Now GraphPartitionBook does not support \
-                getting remote tensor of eid2localeid."
-            )
-        return F.gather_row(self._eidg2l[partid], eids)
-
-    @property
-    def partid(self):
-        """Get the current partition ID"""
-        return self._part_id
-
-    @property
-    def ntypes(self):
-        """Get the list of node types"""
-        return [DEFAULT_NTYPE]
-
-    @property
-    def etypes(self):
-        """Get the list of edge types"""
-        return [DEFAULT_ETYPE[1]]
-
-    @property
-    def canonical_etypes(self):
-        """Get the list of canonical edge types
-
-        Returns
-        -------
-        list[(str, str, str)]
-            A list of canonical etypes
-        """
-        return [DEFAULT_ETYPE]
-
-    def to_canonical_etype(self, etype):
-        """Convert an edge type to the corresponding canonical edge type.
-
-        Parameters
-        ----------
-        etype : str or (str, str, str)
-            The edge type
-
-        Returns
-        -------
-        (str, str, str)
-            The corresponding canonical edge type
-        """
-        assert etype in (
-            DEFAULT_ETYPE,
-            DEFAULT_ETYPE[1],
-        ), "Base partition book only supports homogeneous graph."
-        return self.canonical_etypes[0]
-
-
 class RangePartitionBook(GraphPartitionBook):
    """This partition book supports more efficient storage of partition information.


--- a/python/dgl/distributed/partition.py
+++ b/python/dgl/distributed/partition.py
--- a/tests/dist/python/run_dist_objects.py
+++ b/tests/dist/python/run_dist_objects.py
@@ -682,7 +682,7 @@ elif mode == "client":
    dgl.distributed.initialize(ip_config, net_type=net_type)

    gpb, graph_name, _, _ = load_partition_book(
-        graph_path + "/{}.json".format(graph_name), part_id, None
+        graph_path + "/{}.json".format(graph_name), part_id
    )
    g = dgl.distributed.DistGraph(graph_name, gpb=gpb)


--- a/tests/distributed/test_dist_graph_store.py
+++ b/tests/distributed/test_dist_graph_store.py
@@ -119,7 +119,7 @@ def run_client_empty(
    os.environ["DGL_NUM_SERVER"] = str(server_count)
    dgl.distributed.initialize("kv_ip_config.txt")
    gpb, graph_name, _, _ = load_partition_book(
-        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id
    )
    g = DistGraph(graph_name, gpb=gpb)
    check_dist_graph_empty(g, num_clients, num_nodes, num_edges)
@@ -187,7 +187,7 @@ def run_client(
    os.environ["DGL_GROUP_ID"] = str(group_id)
    dgl.distributed.initialize("kv_ip_config.txt")
    gpb, graph_name, _, _ = load_partition_book(
-        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id
    )
    g = DistGraph(graph_name, gpb=gpb)
    check_dist_graph(g, num_clients, num_nodes, num_edges)
@@ -206,7 +206,7 @@ def run_emb_client(
    os.environ["DGL_GROUP_ID"] = str(group_id)
    dgl.distributed.initialize("kv_ip_config.txt")
    gpb, graph_name, _, _ = load_partition_book(
-        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id
    )
    g = DistGraph(graph_name, gpb=gpb)
    check_dist_emb(g, num_clients, num_nodes, num_edges)
@@ -230,7 +230,7 @@ def run_optim_client(
        backend="gloo", rank=rank, world_size=world_size
    )
    gpb, graph_name, _, _ = load_partition_book(
-        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id
    )
    g = DistGraph(graph_name, gpb=gpb)
    check_dist_optim_store(rank, num_nodes, optimizer_states, save)
@@ -279,7 +279,7 @@ def run_client_hierarchy(
    os.environ["DGL_NUM_SERVER"] = str(server_count)
    dgl.distributed.initialize("kv_ip_config.txt")
    gpb, graph_name, _, _ = load_partition_book(
-        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id
    )
    g = DistGraph(graph_name, gpb=gpb)
    node_mask = F.tensor(node_mask)
@@ -687,7 +687,7 @@ def run_client_hetero(
    os.environ["DGL_NUM_SERVER"] = str(server_count)
    dgl.distributed.initialize("kv_ip_config.txt")
    gpb, graph_name, _, _ = load_partition_book(
-        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id
    )
    g = DistGraph(graph_name, gpb=gpb)
    check_dist_graph_hetero(g, num_clients, num_nodes, num_edges)

--- a/tests/distributed/test_distributed_sampling.py
+++ b/tests/distributed/test_distributed_sampling.py
@@ -101,7 +101,7 @@ def check_rpc_sampling(tmpdir, num_server):
    num_hops = 1

    partition_graph(g, 'test_sampling', num_parts, tmpdir,
-                    num_hops=num_hops, part_method='metis', reshuffle=False)
+                    num_hops=num_hops, part_method='metis')

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -132,7 +132,7 @@ def check_rpc_find_edges_shuffle(tmpdir, num_server):

    orig_nid, orig_eid = partition_graph(g, 'test_find_edges', num_parts, tmpdir,
                                         num_hops=1, part_method='metis',
-                                         reshuffle=True, return_mapping=True)
+                                         return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -178,7 +178,7 @@ def check_rpc_hetero_find_edges_shuffle(tmpdir, num_server):

    orig_nid, orig_eid = partition_graph(g, 'test_find_edges', num_parts, tmpdir,
                                         num_hops=1, part_method='metis',
-                                         reshuffle=True, return_mapping=True)
+                                         return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -227,7 +227,7 @@ def check_rpc_get_degree_shuffle(tmpdir, num_server):
    num_parts = num_server

    orig_nid, _ = partition_graph(g, 'test_get_degrees', num_parts, tmpdir,
-        num_hops=1, part_method='metis', reshuffle=True, return_mapping=True)
+        num_hops=1, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -281,7 +281,7 @@ def check_rpc_sampling_shuffle(tmpdir, num_server, num_groups=1):
    num_hops = 1

    orig_nids, orig_eids = partition_graph(g, 'test_sampling', num_parts, tmpdir,
-        num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
+        num_hops=num_hops, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -379,7 +379,7 @@ def check_rpc_hetero_sampling_shuffle(tmpdir, num_server):
    num_hops = 1

    orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
-        num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
+        num_hops=num_hops, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -431,7 +431,7 @@ def check_rpc_hetero_sampling_empty_shuffle(tmpdir, num_server):

    orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
                                   num_hops=num_hops, part_method='metis',
-                                   reshuffle=True, return_mapping=True)
+                                   return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -461,7 +461,7 @@ def check_rpc_hetero_etype_sampling_shuffle(tmpdir, num_server, graph_formats=No
    num_hops = 1

    orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
-        num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True,
+        num_hops=num_hops, part_method='metis', return_mapping=True,
        graph_formats=graph_formats)

    pserver_list = []
@@ -515,7 +515,7 @@ def check_rpc_hetero_etype_sampling_empty_shuffle(tmpdir, num_server):

    orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
                                   num_hops=num_hops, part_method='metis',
-                                   reshuffle=True, return_mapping=True)
+                                   return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -607,7 +607,7 @@ def check_rpc_bipartite_sampling_empty(tmpdir, num_server):
    num_hops = 1

    orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
-                                   num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
+                                   num_hops=num_hops, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -641,7 +641,7 @@ def check_rpc_bipartite_sampling_shuffle(tmpdir, num_server):
    num_hops = 1

    orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
-        num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
+        num_hops=num_hops, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -692,7 +692,7 @@ def check_rpc_bipartite_etype_sampling_empty(tmpdir, num_server):
    num_hops = 1

    orig_nids, _ = partition_graph(g, 'test_sampling', num_parts, tmpdir,
-                                   num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
+                                   num_hops=num_hops, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -727,7 +727,7 @@ def check_rpc_bipartite_etype_sampling_shuffle(tmpdir, num_server):
    num_hops = 1

    orig_nid_map, orig_eid_map = partition_graph(g, 'test_sampling', num_parts, tmpdir,
-        num_hops=num_hops, part_method='metis', reshuffle=True, return_mapping=True)
+        num_hops=num_hops, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -795,7 +795,7 @@ def test_rpc_sampling_shuffle(num_server):
        check_rpc_bipartite_etype_sampling_empty(Path(tmpdirname), num_server)
        check_rpc_bipartite_etype_sampling_shuffle(Path(tmpdirname), num_server)

-def check_standalone_sampling(tmpdir, reshuffle):
+def check_standalone_sampling(tmpdir):
    g = CitationGraphDataset("cora")[0]
    prob = np.maximum(np.random.randn(g.num_edges()), 0)
    mask = (prob > 0)
@@ -804,7 +804,7 @@ def check_standalone_sampling(tmpdir, reshuffle):
    num_parts = 1
    num_hops = 1
    partition_graph(g, 'test_sampling', num_parts, tmpdir,
-                    num_hops=num_hops, part_method='metis', reshuffle=reshuffle)
+                    num_hops=num_hops, part_method='metis')

    os.environ['DGL_DIST_MODE'] = 'standalone'
    dgl.distributed.initialize("rpc_ip_config.txt")
@@ -829,7 +829,7 @@ def check_standalone_sampling(tmpdir, reshuffle):
    assert (prob[eid] > 0).all()
    dgl.distributed.exit_client()

-def check_standalone_etype_sampling(tmpdir, reshuffle):
+def check_standalone_etype_sampling(tmpdir):
    hg = CitationGraphDataset('cora')[0]
    prob = np.maximum(np.random.randn(hg.num_edges()), 0)
    mask = (prob > 0)
@@ -839,7 +839,7 @@ def check_standalone_etype_sampling(tmpdir, reshuffle):
    num_hops = 1

    partition_graph(hg, 'test_sampling', num_parts, tmpdir,
-                    num_hops=num_hops, part_method='metis', reshuffle=reshuffle)
+                    num_hops=num_hops, part_method='metis')
    os.environ['DGL_DIST_MODE'] = 'standalone'
    dgl.distributed.initialize("rpc_ip_config.txt")
    dist_graph = DistGraph("test_sampling", part_config=tmpdir / 'test_sampling.json')
@@ -863,7 +863,7 @@ def check_standalone_etype_sampling(tmpdir, reshuffle):
    assert (prob[eid] > 0).all()
    dgl.distributed.exit_client()

-def check_standalone_etype_sampling_heterograph(tmpdir, reshuffle):
+def check_standalone_etype_sampling_heterograph(tmpdir):
    hg = CitationGraphDataset('cora')[0]
    num_parts = 1
    num_hops = 1
@@ -872,7 +872,7 @@ def check_standalone_etype_sampling_heterograph(tmpdir, reshuffle):
                              ('paper', 'cite-by', 'paper'): (dst, src)},
                              {'paper': hg.number_of_nodes()})
    partition_graph(new_hg, 'test_hetero_sampling', num_parts, tmpdir,
-                    num_hops=num_hops, part_method='metis', reshuffle=reshuffle)
+                    num_hops=num_hops, part_method='metis')
    os.environ['DGL_DIST_MODE'] = 'standalone'
    dgl.distributed.initialize("rpc_ip_config.txt")
    dist_graph = DistGraph("test_hetero_sampling", part_config=tmpdir / 'test_hetero_sampling.json')
@@ -892,8 +892,7 @@ def test_standalone_sampling():
    import tempfile
    os.environ['DGL_DIST_MODE'] = 'standalone'
    with tempfile.TemporaryDirectory() as tmpdirname:
-        check_standalone_sampling(Path(tmpdirname), False)
-        check_standalone_sampling(Path(tmpdirname), True)
+        check_standalone_sampling(Path(tmpdirname))

 def start_in_subgraph_client(rank, tmpdir, disable_shared_mem, nodes):
    gpb = None
@@ -917,7 +916,7 @@ def check_rpc_in_subgraph_shuffle(tmpdir, num_server):
    num_parts = num_server

    orig_nid, orig_eid = partition_graph(g, 'test_in_subgraph', num_parts, tmpdir,
-        num_hops=1, part_method='metis', reshuffle=True, return_mapping=True)
+        num_hops=1, part_method='metis', return_mapping=True)

    pserver_list = []
    ctx = mp.get_context('spawn')
@@ -964,23 +963,21 @@ def test_standalone_etype_sampling():
    import tempfile
    with tempfile.TemporaryDirectory() as tmpdirname:
        os.environ['DGL_DIST_MODE'] = 'standalone'
-        check_standalone_etype_sampling_heterograph(Path(tmpdirname), True)
+        check_standalone_etype_sampling_heterograph(Path(tmpdirname))
    with tempfile.TemporaryDirectory() as tmpdirname:
        os.environ['DGL_DIST_MODE'] = 'standalone'
-        check_standalone_etype_sampling(Path(tmpdirname), True)
+        check_standalone_etype_sampling(Path(tmpdirname))

 if __name__ == "__main__":
    import tempfile
    with tempfile.TemporaryDirectory() as tmpdirname:
        os.environ['DGL_DIST_MODE'] = 'standalone'
-        check_standalone_etype_sampling_heterograph(Path(tmpdirname), True)
+        check_standalone_etype_sampling_heterograph(Path(tmpdirname))

    with tempfile.TemporaryDirectory() as tmpdirname:
        os.environ['DGL_DIST_MODE'] = 'standalone'
-        check_standalone_etype_sampling(Path(tmpdirname), True)
-        check_standalone_etype_sampling(Path(tmpdirname), False)
-        check_standalone_sampling(Path(tmpdirname), True)
-        check_standalone_sampling(Path(tmpdirname), False)
+        check_standalone_etype_sampling(Path(tmpdirname))
+        check_standalone_sampling(Path(tmpdirname))
        os.environ['DGL_DIST_MODE'] = 'distributed'
        check_rpc_sampling(Path(tmpdirname), 2)
        check_rpc_sampling(Path(tmpdirname), 1)

--- a/tests/distributed/test_mp_dataloader.py
+++ b/tests/distributed/test_mp_dataloader.py
@@ -162,7 +162,6 @@ def test_standalone():
            test_dir,
            num_hops=num_hops,
            part_method="metis",
-            reshuffle=True,
            return_mapping=True,
        )
        part_config = os.path.join(test_dir, "test_sampling.json")
@@ -262,7 +261,6 @@ def check_neg_dataloader(g, num_server, num_workers):
            test_dir,
            num_hops=num_hops,
            part_method="metis",
-            reshuffle=True,
            return_mapping=True,
        )
        part_config = os.path.join(test_dir, "test_sampling.json")
@@ -317,10 +315,9 @@ def check_neg_dataloader(g, num_server, num_workers):
 @pytest.mark.parametrize("num_server", [3])
 @pytest.mark.parametrize("num_workers", [0, 4])
 @pytest.mark.parametrize("drop_last", [True, False])
-@pytest.mark.parametrize("reshuffle", [True, False])
 @pytest.mark.parametrize("num_groups", [1])
 def test_dist_dataloader(
-    num_server, num_workers, drop_last, reshuffle, num_groups
+    num_server, num_workers, drop_last, num_groups
 ):
    reset_envs()
    # No multiple partitions on single machine for
@@ -343,7 +340,6 @@ def test_dist_dataloader(
            test_dir,
            num_hops=num_hops,
            part_method="metis",
-            reshuffle=reshuffle,
            return_mapping=True,
        )

@@ -560,7 +556,6 @@ def check_dataloader(g, num_server, num_workers, dataloader_type):
            test_dir,
            num_hops=num_hops,
            part_method="metis",
-            reshuffle=True,
            return_mapping=True,
        )
        part_config = os.path.join(test_dir, "test_sampling.json")

--- a/tests/distributed/test_new_kvstore.py
+++ b/tests/distributed/test_new_kvstore.py
@@ -18,8 +18,8 @@ if os.name != "nt":
    import struct

 # Create an one-part Graph
-node_map = F.tensor([0, 0, 0, 0, 0, 0], F.int64)
-edge_map = F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)
+node_map = {'_N': F.tensor([[0, 6]], F.int64)}
+edge_map = {('_N','_E','_N'): F.tensor([[0, 7]], F.int64)}
 global_nid = F.tensor([0, 1, 2, 3, 4, 5], F.int64)
 global_eid = F.tensor([0, 1, 2, 3, 4, 5, 6], F.int64)

@@ -36,8 +36,10 @@ g.add_edges(2, 5)  # 6
 g.ndata[dgl.NID] = global_nid
 g.edata[dgl.EID] = global_eid

-gpb = dgl.distributed.graph_partition_book.BasicPartitionBook(
-    part_id=0, num_parts=1, node_map=node_map, edge_map=edge_map, part_graph=g
+gpb = dgl.distributed.graph_partition_book.RangePartitionBook(
+    part_id=0, num_parts=1, node_map=node_map, edge_map=edge_map,
+    ntypes={ntype: i for i, ntype in enumerate(g.ntypes)},
+    etypes={etype: i for i, etype in enumerate(g.canonical_etypes)}
 )

 node_policy = dgl.distributed.PartitionPolicy(
@@ -110,8 +112,8 @@ def test_partition_policy():
        F.asnumpy(eid_partid),
        F.asnumpy(F.tensor([0, 0, 0, 0, 0, 0, 0], F.int64)),
    )
-    assert node_policy.get_part_size() == len(node_map)
-    assert edge_policy.get_part_size() == len(edge_map)
+    assert node_policy.get_part_size() == len(local_nid)
+    assert edge_policy.get_part_size() == len(local_eid)


 def start_server(server_id, num_clients, num_servers):

--- a/tests/distributed/test_partition.py
+++ b/tests/distributed/test_partition.py
@@ -3,18 +3,20 @@ import os
 import backend as F
 import torch as th
 import dgl
+import json
 import numpy as np
 import pytest
+import tempfile
 from dgl import function as fn
 from dgl.distributed import (
    load_partition,
+    load_partition_book,
    load_partition_feats,
    partition_graph,
 )
 from dgl.distributed.graph_partition_book import (
    DEFAULT_ETYPE,
    DEFAULT_NTYPE,
-    BasicPartitionBook,
    EdgePartitionPolicy,
    HeteroDataName,
    NodePartitionPolicy,
@@ -226,7 +228,6 @@ def check_hetero_partition(
        "/tmp/partition",
        num_hops=num_hops,
        part_method=part_method,
-        reshuffle=True,
        return_mapping=True,
        num_trainers_per_machine=num_trainers_per_machine,
        graph_formats=graph_formats,
@@ -328,7 +329,6 @@ def check_hetero_partition(
 def check_partition(
    g,
    part_method,
-    reshuffle,
    num_parts=4,
    num_trainers_per_machine=1,
    load_feats=True,
@@ -352,7 +352,6 @@ def check_partition(
        "/tmp/partition",
        num_hops=num_hops,
        part_method=part_method,
-        reshuffle=reshuffle,
        return_mapping=True,
        num_trainers_per_machine=num_trainers_per_machine,
        graph_formats=graph_formats,
@@ -445,24 +444,16 @@ def check_partition(
        assert F.shape(orig_eids1)[0] == F.shape(orig_eids2)[0]
        assert np.all(F.asnumpy(orig_eids1) == F.asnumpy(orig_eids2))

-        if reshuffle:
-            local_orig_nids = orig_nids[part_g.ndata[dgl.NID]]
-            local_orig_eids = orig_eids[part_g.edata[dgl.EID]]
-            part_g.ndata["feats"] = F.gather_row(
-                g.ndata["feats"], local_orig_nids
-            )
-            part_g.edata["feats"] = F.gather_row(
-                g.edata["feats"], local_orig_eids
-            )
-            local_nodes = orig_nids[local_nodes]
-            local_edges = orig_eids[local_edges]
-        else:
-            part_g.ndata["feats"] = F.gather_row(
-                g.ndata["feats"], part_g.ndata[dgl.NID]
-            )
-            part_g.edata["feats"] = F.gather_row(
-                g.edata["feats"], part_g.edata[dgl.NID]
-            )
+        local_orig_nids = orig_nids[part_g.ndata[dgl.NID]]
+        local_orig_eids = orig_eids[part_g.edata[dgl.EID]]
+        part_g.ndata["feats"] = F.gather_row(
+            g.ndata["feats"], local_orig_nids
+        )
+        part_g.edata["feats"] = F.gather_row(
+            g.edata["feats"], local_orig_eids
+        )
+        local_nodes = orig_nids[local_nodes]
+        local_edges = orig_eids[local_edges]

        part_g.update_all(fn.copy_u("feats", "msg"), fn.sum("msg", "h"))
        part_g.update_all(fn.copy_e("feats", "msg"), fn.sum("msg", "eh"))
@@ -490,41 +481,37 @@ def check_partition(
            assert np.all(F.asnumpy(true_feats) == F.asnumpy(edata))

        # This only works if node/edge IDs are shuffled.
-        if reshuffle:
-            shuffled_labels.append(node_feats["_N/labels"])
-            shuffled_edata.append(edge_feats["_N:_E:_N/feats"])
+        shuffled_labels.append(node_feats["_N/labels"])
+        shuffled_edata.append(edge_feats["_N:_E:_N/feats"])

    # Verify that we can reconstruct node/edge data for original IDs.
-    if reshuffle:
-        shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
-        shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
-        orig_labels = np.zeros(
-            shuffled_labels.shape, dtype=shuffled_labels.dtype
-        )
-        orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
-        orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
-        orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
-        assert np.all(orig_labels == F.asnumpy(g.ndata["labels"]))
-        assert np.all(orig_edata == F.asnumpy(g.edata["feats"]))
-
-    if reshuffle:
-        node_map = []
-        edge_map = []
-        for i, (num_nodes, num_edges) in enumerate(part_sizes):
-            node_map.append(np.ones(num_nodes) * i)
-            edge_map.append(np.ones(num_edges) * i)
-        node_map = np.concatenate(node_map)
-        edge_map = np.concatenate(edge_map)
-        nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
-        assert F.dtype(nid2pid) in (F.int32, F.int64)
-        assert np.all(F.asnumpy(nid2pid) == node_map)
-        eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
-        assert F.dtype(eid2pid) in (F.int32, F.int64)
-        assert np.all(F.asnumpy(eid2pid) == edge_map)
+    shuffled_labels = F.asnumpy(F.cat(shuffled_labels, 0))
+    shuffled_edata = F.asnumpy(F.cat(shuffled_edata, 0))
+    orig_labels = np.zeros(
+        shuffled_labels.shape, dtype=shuffled_labels.dtype
+    )
+    orig_edata = np.zeros(shuffled_edata.shape, dtype=shuffled_edata.dtype)
+    orig_labels[F.asnumpy(orig_nids)] = shuffled_labels
+    orig_edata[F.asnumpy(orig_eids)] = shuffled_edata
+    assert np.all(orig_labels == F.asnumpy(g.ndata["labels"]))
+    assert np.all(orig_edata == F.asnumpy(g.edata["feats"]))
+
+    node_map = []
+    edge_map = []
+    for i, (num_nodes, num_edges) in enumerate(part_sizes):
+        node_map.append(np.ones(num_nodes) * i)
+        edge_map.append(np.ones(num_edges) * i)
+    node_map = np.concatenate(node_map)
+    edge_map = np.concatenate(edge_map)
+    nid2pid = gpb.nid2partid(F.arange(0, len(node_map)))
+    assert F.dtype(nid2pid) in (F.int32, F.int64)
+    assert np.all(F.asnumpy(nid2pid) == node_map)
+    eid2pid = gpb.eid2partid(F.arange(0, len(edge_map)))
+    assert F.dtype(eid2pid) in (F.int32, F.int64)
+    assert np.all(F.asnumpy(eid2pid) == edge_map)


 @pytest.mark.parametrize("part_method", ["metis", "random"])
-@pytest.mark.parametrize("reshuffle", [True, False])
 @pytest.mark.parametrize("num_parts", [1, 4])
 @pytest.mark.parametrize("num_trainers_per_machine", [1, 4])
 @pytest.mark.parametrize("load_feats", [True, False])
@@ -533,7 +520,6 @@ def check_partition(
 )
 def test_partition(
    part_method,
-    reshuffle,
    num_parts,
    num_trainers_per_machine,
    load_feats,
@@ -546,7 +532,6 @@ def test_partition(
    check_partition(
        g,
        part_method,
-        reshuffle,
        num_parts,
        num_trainers_per_machine,
        load_feats,
@@ -563,31 +548,6 @@ def test_partition(
    )
    reset_envs()

-
-def test_BasicPartitionBook():
-    part_id = 0
-    num_parts = 2
-    node_map = np.random.choice(num_parts, 1000)
-    edge_map = np.random.choice(num_parts, 5000)
-    graph = dgl.rand_graph(1000, 5000)
-    graph = dgl.node_subgraph(graph, F.arange(0, graph.num_nodes()))
-    gpb = BasicPartitionBook(part_id, num_parts, node_map, edge_map, graph)
-    c_etype = ("_N", "_E", "_N")
-    assert gpb.etypes == ["_E"]
-    assert gpb.canonical_etypes == [c_etype]
-
-    node_policy = NodePartitionPolicy(gpb, "_N")
-    assert node_policy.type_name == "_N"
-    expect_except = False
-    try:
-        edge_policy = EdgePartitionPolicy(gpb, "_E")
-    except AssertionError:
-        expect_except = True
-    assert expect_except
-    edge_policy = EdgePartitionPolicy(gpb, c_etype)
-    assert edge_policy.type_name == c_etype
-
-
 def test_RangePartitionBook():
    part_id = 1
    num_parts = 2
@@ -699,3 +659,27 @@ def test_RangePartitionBook():
    assert expect_except
    data_name = HeteroDataName(False, c_etype, "feat")
    assert data_name.get_type() == c_etype
+
+
+def test_UnknownPartitionBook():
+    node_map = {'_N': {0:0, 1:1, 2:2}}
+    edge_map = {'_N:_E:_N': {0:0, 1:1, 2:2}}
+
+    part_metadata = {
+        "num_parts": 1,
+        "num_nodes": len(node_map),
+        "num_edges": len(edge_map),
+        "node_map": node_map,
+        "edge_map": edge_map,
+        "graph_name": "test_graph"
+    }
+
+    with tempfile.TemporaryDirectory() as test_dir:
+        part_config = os.path.join(test_dir, "test_graph.json")
+        with open(part_config, "w") as file:
+            json.dump(part_metadata, file, indent = 4)
+        try:
+            load_partition_book(part_config, 0)
+        except Exception as e:
+            if not isinstance(e, TypeError):
+                raise e
--- a/tests/pytorch/test_dist_optim.py
+++ b/tests/pytorch/test_dist_optim.py
@@ -92,7 +92,7 @@ def run_client(graph_name, cli_id, part_id, server_count):
    os.environ["DGL_NUM_SERVER"] = str(server_count)
    dgl.distributed.initialize("optim_ip_config.txt")
    gpb, graph_name, _, _ = load_partition_book(
-        "/tmp/dist_graph/{}.json".format(graph_name), part_id, None
+        "/tmp/dist_graph/{}.json".format(graph_name), part_id
    )
    g = DistGraph(graph_name, gpb=gpb)
    policy = dgl.distributed.PartitionPolicy("node", g.get_partition_book())