Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
f758c7c1
Unverified
Commit
f758c7c1
authored
Dec 30, 2023
by
Rhett Ying
Committed by
GitHub
Dec 30, 2023
Browse files
[DistDGL] fix distributed partition issue (#6847)
parent
93a58343
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
120 additions
and
31 deletions
+120
-31
tests/scripts/task_distributed_test.sh
tests/scripts/task_distributed_test.sh
+1
-1
tests/tools/test_change_etype_to_canonical_etype.py
tests/tools/test_change_etype_to_canonical_etype.py
+3
-0
tests/tools/test_convert_partition.py
tests/tools/test_convert_partition.py
+97
-0
tools/distpartitioning/convert_partition.py
tools/distpartitioning/convert_partition.py
+19
-30
No files found.
tests/scripts/task_distributed_test.sh
View file @
f758c7c1
...
@@ -36,4 +36,4 @@ export DMLC_LOG_DEBUG=1
...
@@ -36,4 +36,4 @@ export DMLC_LOG_DEBUG=1
python3
-m
pytest
-v
--capture
=
tee-sys
--junitxml
=
pytest_distributed.xml
--durations
=
100 tests/distributed/
*
.py
||
fail
"distributed"
python3
-m
pytest
-v
--capture
=
tee-sys
--junitxml
=
pytest_distributed.xml
--durations
=
100 tests/distributed/
*
.py
||
fail
"distributed"
#
PYTHONPATH=tools:tools/distpartitioning:$PYTHONPATH python3 -m pytest -v --capture=tee-sys --junitxml=pytest_tools.xml --durations=100 tests/tools/*.py || fail "tools"
PYTHONPATH
=
tools:tools/distpartitioning:
$PYTHONPATH
python3
-m
pytest
-v
--capture
=
tee-sys
--junitxml
=
pytest_tools.xml
--durations
=
100 tests/tools/
*
.py
||
fail
"tools"
tests/tools/test_change_etype_to_canonical_etype.py
View file @
f758c7c1
import
json
import
json
import
os
import
os
import
tempfile
import
tempfile
import
unittest
from
collections
import
Counter
from
collections
import
Counter
import
dgl
import
dgl
...
@@ -37,6 +38,7 @@ def create_random_hetero(type_n, node_n):
...
@@ -37,6 +38,7 @@ def create_random_hetero(type_n, node_n):
]
]
@
unittest
.
skip
(
reason
=
"Skip due to glitch in CI"
)
@
pytest
.
mark
.
parametrize
(
@
pytest
.
mark
.
parametrize
(
"type_n, node_n, num_parts"
,
[[
3
,
100
,
2
],
[
10
,
500
,
4
],
[
10
,
1000
,
8
]]
"type_n, node_n, num_parts"
,
[[
3
,
100
,
2
],
[
10
,
500
,
4
],
[
10
,
1000
,
8
]]
)
)
...
@@ -45,6 +47,7 @@ def test_hetero_graph(type_n, node_n, num_parts):
...
@@ -45,6 +47,7 @@ def test_hetero_graph(type_n, node_n, num_parts):
do_convert_and_check
(
g
,
"convert_conf_test"
,
num_parts
,
expected_c_etypes
)
do_convert_and_check
(
g
,
"convert_conf_test"
,
num_parts
,
expected_c_etypes
)
@
unittest
.
skip
(
reason
=
"Skip due to glitch in CI"
)
@
pytest
.
mark
.
parametrize
(
"node_n, num_parts"
,
[[
100
,
2
],
[
500
,
4
]])
@
pytest
.
mark
.
parametrize
(
"node_n, num_parts"
,
[[
100
,
2
],
[
500
,
4
]])
def
test_homo_graph
(
node_n
,
num_parts
):
def
test_homo_graph
(
node_n
,
num_parts
):
g
=
dgl
.
rand_graph
(
node_n
,
node_n
//
10
)
g
=
dgl
.
rand_graph
(
node_n
,
node_n
//
10
)
...
...
tests/tools/test_convert_partition.py
View file @
f758c7c1
...
@@ -152,3 +152,100 @@ def test_get_unique_invidx(num_nodes, num_edges, nid_begin, nid_end):
...
@@ -152,3 +152,100 @@ def test_get_unique_invidx(num_nodes, num_edges, nid_begin, nid_end):
assert
len
(
uniques
)
>
max_dst
,
f
"Inverse idx, dst_ids, invalid max value."
assert
len
(
uniques
)
>
max_dst
,
f
"Inverse idx, dst_ids, invalid max value."
assert
max_dst
>=
0
,
f
"Inverse idx, dst_ids has negative values."
assert
max_dst
>=
0
,
f
"Inverse idx, dst_ids has negative values."
def
test_get_unique_invidx_low_mem
():
srcids
=
np
.
array
([
14
,
0
,
3
,
3
,
0
,
3
,
9
,
5
,
14
,
12
])
dstids
=
np
.
array
([
10
,
16
,
12
,
13
,
10
,
17
,
16
,
13
,
14
,
16
])
unique_nids
=
np
.
array
([
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
])
uniques
,
idxes
,
srcids
,
dstids
=
_get_unique_invidx
(
srcids
,
dstids
,
unique_nids
,
low_mem
=
True
,
)
expected_unqiues
=
np
.
array
(
[
0
,
3
,
5
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
]
)
expected_idxes
=
np
.
array
(
[
1
,
2
,
7
,
6
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
]
)
expected_srcids
=
np
.
array
([
8
,
0
,
1
,
1
,
0
,
1
,
3
,
2
,
8
,
6
])
expected_dstids
=
np
.
array
([
4
,
10
,
6
,
7
,
4
,
11
,
10
,
7
,
8
,
10
])
assert
np
.
all
(
uniques
==
expected_unqiues
),
f
"unique is not expected.
{
uniques
}
!=
{
expected_unqiues
}
"
assert
np
.
all
(
idxes
==
expected_idxes
),
f
"indices is not expected.
{
idxes
}
!=
{
expected_idxes
}
"
assert
np
.
all
(
srcids
==
expected_srcids
),
f
"srcids is not expected.
{
srcids
}
!=
{
expected_srcids
}
"
assert
np
.
all
(
dstids
==
expected_dstids
),
f
"dstdis is not expected.
{
dstids
}
!=
{
expected_dstids
}
"
def
test_get_unique_invidx_high_mem
():
srcids
=
np
.
array
([
14
,
0
,
3
,
3
,
0
,
3
,
9
,
5
,
14
,
12
])
dstids
=
np
.
array
([
10
,
16
,
12
,
13
,
10
,
17
,
16
,
13
,
14
,
16
])
unique_nids
=
np
.
array
([
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
])
uniques
,
idxes
,
srcids
,
dstids
=
_get_unique_invidx
(
srcids
,
dstids
,
unique_nids
,
low_mem
=
False
,
)
expected_unqiues
=
np
.
array
(
[
0
,
3
,
5
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
]
)
expected_idxes
=
np
.
array
(
[
1
,
2
,
7
,
6
,
10
,
21
,
9
,
13
,
0
,
25
,
11
,
15
,
28
,
29
]
)
expected_srcids
=
np
.
array
([
8
,
0
,
1
,
1
,
0
,
1
,
3
,
2
,
8
,
6
])
expected_dstids
=
np
.
array
([
4
,
10
,
6
,
7
,
4
,
11
,
10
,
7
,
8
,
10
])
assert
np
.
all
(
uniques
==
expected_unqiues
),
f
"unique is not expected.
{
uniques
}
!=
{
expected_unqiues
}
"
assert
np
.
all
(
idxes
==
expected_idxes
),
f
"indices is not expected.
{
idxes
}
!=
{
expected_idxes
}
"
assert
np
.
all
(
srcids
==
expected_srcids
),
f
"srcids is not expected.
{
srcids
}
!=
{
expected_srcids
}
"
assert
np
.
all
(
dstids
==
expected_dstids
),
f
"dstdis is not expected.
{
dstids
}
!=
{
expected_dstids
}
"
def
test_get_unique_invidx_low_high_mem
():
srcids
=
np
.
array
([
14
,
0
,
3
,
3
,
0
,
3
,
9
,
5
,
14
,
12
])
dstids
=
np
.
array
([
10
,
16
,
12
,
13
,
10
,
17
,
16
,
13
,
14
,
16
])
unique_nids
=
np
.
array
([
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
])
uniques_low
,
idxes_low
,
srcids_low
,
dstids_low
=
_get_unique_invidx
(
srcids
,
dstids
,
unique_nids
,
low_mem
=
True
,
)
uniques_high
,
idxes_high
,
srcids_high
,
dstids_high
=
_get_unique_invidx
(
srcids
,
dstids
,
unique_nids
,
low_mem
=
False
,
)
assert
np
.
all
(
uniques_low
==
uniques_high
),
f
"unique is not expected.
{
uniques_low
}
!=
{
uniques_high
}
"
assert
not
np
.
all
(
idxes_low
==
idxes_high
),
f
"indices is not expected.
{
idxes_low
}
==
{
idxes_high
}
"
assert
np
.
all
(
srcids_low
==
srcids_high
),
f
"srcids is not expected.
{
srcids_low
}
!=
{
srcids_high
}
"
assert
np
.
all
(
dstids_low
==
dstids_high
),
f
"dstdis is not expected.
{
dstids_low
}
!=
{
dstids_high
}
"
tools/distpartitioning/convert_partition.py
View file @
f758c7c1
...
@@ -21,7 +21,7 @@ from pyarrow import csv
...
@@ -21,7 +21,7 @@ from pyarrow import csv
from
utils
import
get_idranges
,
memory_snapshot
,
read_json
from
utils
import
get_idranges
,
memory_snapshot
,
read_json
def
_get_unique_invidx
(
srcids
,
dstids
,
nids
):
def
_get_unique_invidx
(
srcids
,
dstids
,
nids
,
low_mem
=
True
):
"""This function is used to compute a list of unique elements,
"""This function is used to compute a list of unique elements,
and their indices in the input list, which is the concatenation
and their indices in the input list, which is the concatenation
of srcids, dstids and uniq_nids. In addition, this function will also
of srcids, dstids and uniq_nids. In addition, this function will also
...
@@ -34,6 +34,14 @@ def _get_unique_invidx(srcids, dstids, nids):
...
@@ -34,6 +34,14 @@ def _get_unique_invidx(srcids, dstids, nids):
550GB of systems memory, which is limiting the capability of the
550GB of systems memory, which is limiting the capability of the
partitioning pipeline.
partitioning pipeline.
Note: This function is a workaround solution for the high memory requirement
of numpy's unique function call. This function is not a general purpose
function and is only used in the context of the partitioning pipeline.
What's more, this function does not behave exactly the same as numpy's
unique function call. Namely, this function does not return the exact same
inverse indices as numpy's unique function call. However, for the current
use case, this function is sufficient.
Current numpy uniques function returns 3 return parameters, which are
Current numpy uniques function returns 3 return parameters, which are
. list of unique elements
. list of unique elements
. list of indices, in the input argument list, which are first
. list of indices, in the input argument list, which are first
...
@@ -62,6 +70,11 @@ def _get_unique_invidx(srcids, dstids, nids):
...
@@ -62,6 +70,11 @@ def _get_unique_invidx(srcids, dstids, nids):
list of dstids. Current implementation of the pipeline guarantees
list of dstids. Current implementation of the pipeline guarantees
this assumption and is used to simplify the current implementation
this assumption and is used to simplify the current implementation
of the workaround solution.
of the workaround solution.
low_mem : bool, optional
Indicates whether to use the low memory version of the function. If
``False``, the function will use numpy's native ``unique`` function.
Otherwise, the function will use the low memory version of the
function.
Returns:
Returns:
--------
--------
...
@@ -84,12 +97,11 @@ def _get_unique_invidx(srcids, dstids, nids):
...
@@ -84,12 +97,11 @@ def _get_unique_invidx(srcids, dstids, nids):
),
f
"Please provide the correct input parameters"
),
f
"Please provide the correct input parameters"
assert
len
(
srcids
)
!=
0
,
f
"Please provide a non-empty edge-list."
assert
len
(
srcids
)
!=
0
,
f
"Please provide a non-empty edge-list."
if
n
p
.
__version__
<
"1.24.0"
:
if
n
ot
low_mem
:
logging
.
warning
(
logging
.
warning
(
f
"Numpy version,
{
np
.
__version__
}
, is lower than expected."
"Calling numpy's native function unique. This functions memory "
f
"Falling back to numpy's native function unique."
"overhead will limit size of the partitioned graph objects "
f
"This functions memory overhead will limit size of the "
"processed by each node in the cluster."
f
"partitioned graph objects processed by each node in the cluster."
)
)
uniques
,
idxes
,
inv_idxes
=
np
.
unique
(
uniques
,
idxes
,
inv_idxes
=
np
.
unique
(
np
.
concatenate
([
srcids
,
dstids
,
nids
]),
np
.
concatenate
([
srcids
,
dstids
,
nids
]),
...
@@ -128,30 +140,7 @@ def _get_unique_invidx(srcids, dstids, nids):
...
@@ -128,30 +140,7 @@ def _get_unique_invidx(srcids, dstids, nids):
# uniques and idxes are built
# uniques and idxes are built
assert
len
(
uniques
)
==
len
(
idxes
),
f
"Error building the idxes array."
assert
len
(
uniques
)
==
len
(
idxes
),
f
"Error building the idxes array."
# build inverse idxes for srcids, dstids and nids
srcids
=
np
.
searchsorted
(
uniques
,
srcids
,
side
=
"left"
)
# over-write the srcids and dstids arrays.
sort_ids
=
np
.
argsort
(
srcids
)
srcids
=
srcids
[
sort_ids
]
# TODO: check if wrapping this while loop in a c++ wrapper
# helps in speeding up the code
idx1
=
0
idx2
=
0
while
(
idx1
<
len
(
srcids
))
and
(
idx2
<
len
(
uniques
)):
if
srcids
[
idx1
]
==
uniques
[
idx2
]:
srcids
[
idx1
]
=
idx2
idx1
+=
1
elif
srcids
[
idx1
]
<
uniques
[
idx2
]:
idx1
+=
1
else
:
idx2
+=
1
assert
idx1
>=
len
(
srcids
),
(
f
"Failed to locate all srcids in the uniques array "
f
" len(srcids) =
{
len
(
srcids
)
}
, idx1 =
{
idx1
}
"
f
" len(uniques) =
{
len
(
uniques
)
}
, idx2 =
{
idx2
}
"
)
srcids
[
sort_ids
]
=
srcids
# process dstids now.
# process dstids now.
# dstids is guaranteed to be a subset of the `nids` list
# dstids is guaranteed to be a subset of the `nids` list
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment