Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
5f327ff4
Unverified
Commit
5f327ff4
authored
Oct 17, 2023
by
LastWhisper
Committed by
GitHub
Oct 17, 2023
Browse files
[GraphBolt] Ensure the contiguity of the `TorchBasedFeature._tensor`. (#6449)
parent
e4bb6abc
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
139 additions
and
5 deletions
+139
-5
python/dgl/graphbolt/impl/torch_based_feature_store.py
python/dgl/graphbolt/impl/torch_based_feature_store.py
+2
-1
python/dgl/graphbolt/utils/internal.py
python/dgl/graphbolt/utils/internal.py
+16
-4
tests/python/pytorch/graphbolt/impl/test_torch_based_feature_store.py
.../pytorch/graphbolt/impl/test_torch_based_feature_store.py
+38
-0
tests/python/pytorch/graphbolt/utils/test_internal.py
tests/python/pytorch/graphbolt/utils/test_internal.py
+83
-0
No files found.
python/dgl/graphbolt/impl/torch_based_feature_store.py
View file @
5f327ff4
...
...
@@ -69,7 +69,8 @@ class TorchBasedFeature(Feature):
f
"dimension of torch_feature in TorchBasedFeature must be greater "
f
"than 1, but got
{
torch_feature
.
dim
()
}
dimension."
)
self
.
_tensor
=
torch_feature
# Make sure the tensor is contiguous.
self
.
_tensor
=
torch_feature
.
contiguous
()
def
read
(
self
,
ids
:
torch
.
Tensor
=
None
):
"""Read the feature by index.
...
...
python/dgl/graphbolt/utils/internal.py
View file @
5f327ff4
...
...
@@ -35,13 +35,25 @@ def save_data(data, path, fmt):
raise
RuntimeError
(
f
"Unsupported format:
{
fmt
}
"
)
# Perform necessary conversion.
if
fmt
==
"numpy"
and
isinstance
(
element
,
torch
.
Tensor
):
element
=
element
.
cpu
().
numpy
()
elif
fmt
==
"torch"
and
isinstance
(
element
,
np
.
ndarray
):
element
=
torch
.
from_numpy
(
element
).
cpu
()
if
fmt
==
"numpy"
and
isinstance
(
data
,
torch
.
Tensor
):
data
=
data
.
cpu
().
numpy
()
elif
fmt
==
"torch"
and
isinstance
(
data
,
np
.
ndarray
):
data
=
torch
.
from_numpy
(
data
).
cpu
()
# Save the data.
if
fmt
==
"numpy"
:
if
not
data
.
flags
[
"C_CONTIGUOUS"
]:
Warning
(
"The ndarray saved to disk is not contiguous, "
"so it will be copied to contiguous memory."
)
data
=
np
.
ascontiguousarray
(
data
)
np
.
save
(
path
,
data
)
elif
fmt
==
"torch"
:
if
not
data
.
is_contiguous
():
Warning
(
"The tensor saved to disk is not contiguous, "
"so it will be copied to contiguous memory."
)
data
=
data
.
contiguous
()
torch
.
save
(
data
,
path
)
tests/python/pytorch/graphbolt/impl/test_torch_based_feature_store.py
View file @
5f327ff4
...
...
@@ -86,6 +86,44 @@ def test_torch_based_feature(in_memory):
a
=
b
=
None
feature_a
=
feature_b
=
None
# Test loaded tensors' contiguity from C/Fortran contiguous ndarray.
contiguous_numpy
=
np
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
]],
order
=
"C"
)
non_contiguous_numpy
=
np
.
array
([[
1
,
2
,
3
],
[
4
,
5
,
6
]],
order
=
"F"
)
assert
contiguous_numpy
.
flags
[
"C_CONTIGUOUS"
]
assert
non_contiguous_numpy
.
flags
[
"F_CONTIGUOUS"
]
np
.
save
(
os
.
path
.
join
(
test_dir
,
"contiguous_numpy.npy"
),
contiguous_numpy
)
np
.
save
(
os
.
path
.
join
(
test_dir
,
"non_contiguous_numpy.npy"
),
non_contiguous_numpy
,
)
cur_mmap_mode
=
None
if
not
in_memory
:
cur_mmap_mode
=
"r+"
feature_a
=
gb
.
TorchBasedFeature
(
torch
.
from_numpy
(
np
.
load
(
os
.
path
.
join
(
test_dir
,
"contiguous_numpy.npy"
),
mmap_mode
=
cur_mmap_mode
,
)
)
)
feature_b
=
gb
.
TorchBasedFeature
(
torch
.
from_numpy
(
np
.
load
(
os
.
path
.
join
(
test_dir
,
"non_contiguous_numpy.npy"
),
mmap_mode
=
cur_mmap_mode
,
)
)
)
assert
feature_a
.
_tensor
.
is_contiguous
()
assert
feature_b
.
_tensor
.
is_contiguous
()
contiguous_numpy
=
non_contiguous_numpy
=
None
feature_a
=
feature_b
=
None
def
write_tensor_to_disk
(
dir
,
name
,
t
,
fmt
=
"torch"
):
if
fmt
==
"torch"
:
...
...
tests/python/pytorch/graphbolt/utils/test_internal.py
0 → 100644
View file @
5f327ff4
import
os
import
tempfile
import
dgl.graphbolt.utils
as
utils
import
numpy
as
np
import
pytest
import
torch
def
test_read_torch_data
():
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
save_tensor
=
torch
.
tensor
([[
1
,
2
,
4
],
[
2
,
5
,
3
]])
file_name
=
os
.
path
.
join
(
test_dir
,
"save_tensor.pt"
)
torch
.
save
(
save_tensor
,
file_name
)
read_tensor
=
utils
.
internal
.
_read_torch_data
(
file_name
)
assert
torch
.
equal
(
save_tensor
,
read_tensor
)
save_tensor
=
read_tensor
=
None
@
pytest
.
mark
.
parametrize
(
"in_memory"
,
[
True
,
False
])
def
test_read_numpy_data
(
in_memory
):
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
save_numpy
=
np
.
array
([[
1
,
2
,
4
],
[
2
,
5
,
3
]])
file_name
=
os
.
path
.
join
(
test_dir
,
"save_numpy.npy"
)
np
.
save
(
file_name
,
save_numpy
)
read_tensor
=
utils
.
internal
.
_read_numpy_data
(
file_name
,
in_memory
)
assert
torch
.
equal
(
torch
.
from_numpy
(
save_numpy
),
read_tensor
)
save_numpy
=
read_tensor
=
None
@
pytest
.
mark
.
parametrize
(
"fmt"
,
[
"torch"
,
"numpy"
])
def
test_read_data
(
fmt
):
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
data
=
np
.
array
([[
1
,
2
,
4
],
[
2
,
5
,
3
]])
type_name
=
"pt"
if
fmt
==
"torch"
else
"npy"
file_name
=
os
.
path
.
join
(
test_dir
,
f
"save_data.
{
type_name
}
"
)
if
fmt
==
"numpy"
:
np
.
save
(
file_name
,
data
)
elif
fmt
==
"torch"
:
torch
.
save
(
torch
.
from_numpy
(
data
),
file_name
)
read_tensor
=
utils
.
read_data
(
file_name
,
fmt
)
assert
torch
.
equal
(
torch
.
from_numpy
(
data
),
read_tensor
)
@
pytest
.
mark
.
parametrize
(
"data_fmt, save_fmt, contiguous"
,
[
(
"torch"
,
"torch"
,
True
),
(
"torch"
,
"torch"
,
False
),
(
"torch"
,
"numpy"
,
True
),
(
"torch"
,
"numpy"
,
False
),
(
"numpy"
,
"torch"
,
True
),
(
"numpy"
,
"torch"
,
False
),
(
"numpy"
,
"numpy"
,
True
),
(
"numpy"
,
"numpy"
,
False
),
],
)
def
test_save_data
(
data_fmt
,
save_fmt
,
contiguous
):
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
data
=
np
.
array
([[
1
,
2
,
4
],
[
2
,
5
,
3
]])
if
not
contiguous
:
data
=
np
.
asfortranarray
(
data
)
tensor_data
=
torch
.
from_numpy
(
data
)
type_name
=
"pt"
if
save_fmt
==
"torch"
else
"npy"
save_file_name
=
os
.
path
.
join
(
test_dir
,
f
"save_data.
{
type_name
}
"
)
# Step1. Save the data.
if
data_fmt
==
"torch"
:
utils
.
save_data
(
tensor_data
,
save_file_name
,
save_fmt
)
elif
data_fmt
==
"numpy"
:
utils
.
save_data
(
data
,
save_file_name
,
save_fmt
)
# Step2. Load the data.
if
save_fmt
==
"torch"
:
loaded_data
=
torch
.
load
(
save_file_name
)
assert
loaded_data
.
is_contiguous
()
assert
torch
.
equal
(
tensor_data
,
loaded_data
)
elif
save_fmt
==
"numpy"
:
loaded_data
=
np
.
load
(
save_file_name
)
# Checks if the loaded data is C-contiguous.
assert
loaded_data
.
flags
[
"C_CONTIGUOUS"
]
assert
np
.
array_equal
(
tensor_data
.
numpy
(),
loaded_data
)
data
=
tensor_data
=
loaded_data
=
None
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment