Unverified Commit fbbca994 authored by Cheng Wan's avatar Cheng Wan Committed by GitHub
Browse files

[Feature] METIS Partition with Communication Volume Minimization (#3821)

* upd

* upd

* upd

* upd

* upd

* fix OpenMP compatibility issues

* typo

* partition

* misc

* fix typo

* num_parts=1

* import torch

* long

* print info

* print info

* print info

* upd

* remove debug code

* revert partition.py

* fix cut count

* fix cut count

* Revert "fix cut count"

This reverts commit 10926b4fd48f45c8f1ddb58be7db6c22e653effd.

* Revert "fix cut count"

This reverts commit 76465283bef093a2b4209ad70dd15d2437b2ec8a.

* type of deprecate

* typo in deprecate info

* fix typo

* use cv for partitioning

* CE

* no message

* revert

* typo

* add objtype

* no message

* fix bug

* fix bug

* fix bug

* ?

* semicolon

* drop tensors

* no message

* backward

* backward

* max op

* store X.shape

* th

* test

* Revert "test"

This reverts commit 92b3b2f64a3a1128590098fa03ce429c5466e6ce.

* test

* tolist

* debug

* to cuda

* tuple

* fix bug

* remove X

* no message

* fix bug

* workload balance

* Revert "workload balance"

This reverts commit d7f8e4a16ba2a7eabb4a9bb945523bfe6623e723.

* reverse

* Revert "reverse"

This reverts commit 8a71cf25685aa7d889b9b8881b46f7a16b7d6e6d.

* Revert "Revert "reverse""

This reverts commit 196b143932d5cf9813576ece7c990b63d322d063.

* Revert "Revert "Revert "reverse"""

This reverts commit cf9e89a07013582056e7cde235e51331aca7fa9c.

* no message

* Merge commit '5498cf05'

# Conflicts:
#	python/dgl/distributed/partition.py

* Revert "Merge commit '5498cf05

'"

This reverts commit f79be2ad777897c7025b28308454cad81ad6bb27.

* fix bug

* third party

* no message

* try to avoid memory leak

* try to avoid memory leak

* avoid memory leak with no hope

* Revert "avoid memory leak with no hope"

This reverts commit c77befe9479f46758e744642f66dd209b50eef7d.

* no message

* Revert "no message"

This reverts commit 478cb28fe25fb1002b2f1dc202bb9bdaad8b2a56.

* del

* Revert "del"

This reverts commit 1b468e45ce646b400ff3ffa61a0b2da058b3bdfd.

* no message

* no message

* Revert "no message"

This reverts commit 92e4f5561ed42da0606618b2fff9f1ad5ed439d9.

* third party

* document

* Update metis_partition.cc

* Update metis_partition_hetero.cc

* Update metis_partition_hetero.cc

* Update partition.py

* Update partition.py

* Update partition.py
Co-authored-by: default avataryzh119 <expye@outlook.com>
Co-authored-by: default avatarchwan-rice <54331508+chwan-rice@users.noreply.github.com>
Co-authored-by: default avatarJinjing Zhou <VoVAllen@users.noreply.github.com>
Co-authored-by: default avatarDa Zheng <zhengda1936@gmail.com>
parent ae3316c8
......@@ -293,7 +293,7 @@ def _set_trainer_ids(g, sim_g, node_parts):
def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method="metis",
reshuffle=True, balance_ntypes=None, balance_edges=False, return_mapping=False,
num_trainers_per_machine=1):
num_trainers_per_machine=1, objtype='cut'):
''' Partition a graph for distributed training and store the partitions on files.
The partitioning occurs in three steps: 1) run a partition algorithm (e.g., Metis) to
......@@ -473,6 +473,9 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
each node will be stored in the node feature 'trainer_id'. Then the partitions of trainers
on the same machine will be coalesced into one larger partition. The final number of
partitions is `num_part`.
objtype : str, "cut" or "vol"
Set the objective as edge-cut minimization or communication volume minimization. This
argument is used by the Metis algorithm.
Returns
-------
......@@ -532,6 +535,9 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
bal_ntypes = sim_g.ndata[NTYPE]
return sim_g, bal_ntypes
if objtype not in ['cut', 'vol']:
raise ValueError
if not reshuffle:
dgl_warning("The argument reshuffle will be deprecated in the next release. "
"For heterogeneous graphs, reshuffle must be enabled.")
......@@ -583,7 +589,7 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
sim_g, num_parts * num_trainers_per_machine,
balance_ntypes=balance_ntypes,
balance_edges=balance_edges,
mode='k-way')
mode='k-way', objtype=objtype)
_set_trainer_ids(g, sim_g, node_parts)
# And then coalesce the partitions of trainers on the same machine into one
......@@ -592,7 +598,8 @@ def partition_graph(g, graph_name, num_parts, out_path, num_hops=1, part_method=
else:
node_parts = metis_partition_assignment(sim_g, num_parts,
balance_ntypes=balance_ntypes,
balance_edges=balance_edges)
balance_edges=balance_edges,
objtype=objtype)
print('Assigning nodes to METIS partitions takes {:.3f}s, peak mem: {:.3f} GB'.format(
time.time() - start, get_peak_mem()))
else:
......
......@@ -248,7 +248,8 @@ def get_peak_mem():
return int(mem) / 1024 / 1024
return 0.0
def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False, mode="k-way"):
def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False,
mode="k-way", objtype='cut'):
''' This assigns nodes to different partitions with Metis partitioning algorithm.
When performing Metis partitioning, we can put some constraint on the partitioning.
......@@ -275,6 +276,9 @@ def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False, m
Indicate whether to balance the edges.
mode : str, "k-way" or "recursive"
Whether use multilevel recursive bisection or multilevel k-way paritioning.
objtype : str, "cut" or "vol"
Set the objective as edge-cut minimization or communication volume minimization. This
argument is used by the Metis algorithm.
Returns
-------
......@@ -334,7 +338,7 @@ def metis_partition_assignment(g, k, balance_ntypes=None, balance_edges=False, m
time.time() - start, get_peak_mem()))
start = time.time()
node_part = _CAPI_DGLMetisPartition_Hetero(sym_g._graph, k, vwgt, mode)
node_part = _CAPI_DGLMetisPartition_Hetero(sym_g._graph, k, vwgt, mode, (objtype == 'cut'))
print('Metis partitioning: {:.3f} seconds, peak memory: {:.3f} GB'.format(
time.time() - start, get_peak_mem()))
if len(node_part) == 0:
......
......@@ -15,7 +15,7 @@ namespace dgl {
#if !defined(_WIN32)
IdArray MetisPartition(GraphPtr g, int k, NDArray vwgt_arr) {
IdArray MetisPartition(GraphPtr g, int k, NDArray vwgt_arr, bool obj_cut) {
// The index type of Metis needs to be compatible with DGL index type.
CHECK_EQ(sizeof(idx_t), sizeof(dgl_id_t));
ImmutableGraphPtr ig = std::dynamic_pointer_cast<ImmutableGraph>(g);
......@@ -50,6 +50,12 @@ IdArray MetisPartition(GraphPtr g, int k, NDArray vwgt_arr) {
options[METIS_OPTION_NIPARTS] = 1;
options[METIS_OPTION_DROPEDGES] = 1;
if (obj_cut) {
options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
} else {
options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_VOL;
}
int ret = METIS_PartGraphKway(&nvtxs, // The number of vertices
&ncon, // The number of balancing constraints.
xadj, // indptr
......@@ -65,10 +71,17 @@ IdArray MetisPartition(GraphPtr g, int k, NDArray vwgt_arr) {
&objval, // the edge-cut or the total communication volume of
// the partitioning solution
part);
LOG(INFO) << "Partition a graph with " << g->NumVertices()
<< " nodes and " << g->NumEdges()
<< " edges into " << k
<< " parts and get " << objval << " edge cuts";
if (obj_cut) {
LOG(INFO) << "Partition a graph with " << g->NumVertices() << " nodes and "
<< g->NumEdges() << " edges into " << k << " parts and "
<< "get " << objval << " edge cuts";
} else {
LOG(INFO) << "Partition a graph with " << g->NumVertices() << " nodes and "
<< g->NumEdges() << " edges into " << k << " parts and "
<< "the communication volume is " << objval;
}
switch (ret) {
case METIS_OK:
return part_arr;
......@@ -90,8 +103,9 @@ DGL_REGISTER_GLOBAL("transform._CAPI_DGLMetisPartition")
GraphRef g = args[0];
int k = args[1];
NDArray vwgt = args[2];
bool obj_cut = args[3];
#if !defined(_WIN32)
*rv = MetisPartition(g.sptr(), k, vwgt);
*rv = MetisPartition(g.sptr(), k, vwgt, obj_cut);
#else
LOG(FATAL) << "Metis partition does not support Windows.";
#endif // !defined(_WIN32)
......
......@@ -19,7 +19,8 @@ namespace transform {
#if !defined(_WIN32)
IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr, const std::string &mode) {
IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr,
const std::string &mode, bool obj_cut) {
// Mode can only be "k-way" or "recursive"
CHECK(mode == "k-way" || mode == "recursive")
<< "mode can only be \"k-way\" or \"recursive\"";
......@@ -59,6 +60,12 @@ IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr, const std::strin
options[METIS_OPTION_NIPARTS] = 1;
options[METIS_OPTION_DROPEDGES] = 1;
if (obj_cut) {
options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_CUT;
} else {
options[METIS_OPTION_OBJTYPE] = METIS_OBJTYPE_VOL;
}
int ret = partition_func(
&nvtxs, // The number of vertices
&ncon, // The number of balancing constraints.
......@@ -75,9 +82,17 @@ IdArray MetisPartition(UnitGraphPtr g, int k, NDArray vwgt_arr, const std::strin
&objval, // the edge-cut or the total communication volume of
// the partitioning solution
part);
if (obj_cut) {
LOG(INFO) << "Partition a graph with " << g->NumVertices(0) << " nodes and "
<< g->NumEdges(0) << " edges into " << k << " parts and get "
<< objval << " edge cuts";
<< g->NumEdges(0) << " edges into " << k << " parts and "
<< "get " << objval << " edge cuts";
} else {
LOG(INFO) << "Partition a graph with " << g->NumVertices(0) << " nodes and "
<< g->NumEdges(0) << " edges into " << k << " parts and "
<< "the communication volume is " << objval;
}
switch (ret) {
case METIS_OK:
return part_arr;
......@@ -105,8 +120,9 @@ DGL_REGISTER_GLOBAL("partition._CAPI_DGLMetisPartition_Hetero")
int k = args[1];
NDArray vwgt = args[2];
std::string mode = args[3];
bool obj_cut = args[4];
#if !defined(_WIN32)
*rv = MetisPartition(ugptr, k, vwgt, mode);
*rv = MetisPartition(ugptr, k, vwgt, mode, obj_cut);
#else
LOG(FATAL) << "Metis partition does not support Windows.";
#endif // !defined(_WIN32)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment