Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
40dcc715
Unverified
Commit
40dcc715
authored
Jun 29, 2023
by
czkkkkkk
Committed by
GitHub
Jun 29, 2023
Browse files
[Graphbolt] Support on-disk feature store. (#5914)
parent
69d9b726
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
86 additions
and
39 deletions
+86
-39
python/dgl/graphbolt/feature_store.py
python/dgl/graphbolt/feature_store.py
+31
-9
tests/python/pytorch/graphbolt/test_feature_store.py
tests/python/pytorch/graphbolt/test_feature_store.py
+55
-30
No files found.
python/dgl/graphbolt/feature_store.py
View file @
40dcc715
...
@@ -49,18 +49,20 @@ class FeatureStore:
...
@@ -49,18 +49,20 @@ class FeatureStore:
raise
NotImplementedError
raise
NotImplementedError
class
InMemory
FeatureStore
(
FeatureStore
):
class
TorchBased
FeatureStore
(
FeatureStore
):
r
"""
In-memory
key-value feature store, where the key
is a
string and
value
r
"""
Torch based
key-value feature store, where the key
are
string
s
and
is
Pytorch tensor."""
values are
Pytorch tensor
s
."""
def
__init__
(
self
,
feature_dict
:
dict
):
def
__init__
(
self
,
feature_dict
:
dict
):
"""Initialize a
n in-memory
feature store.
"""Initialize a
torch based
feature store.
The feature store is initialized with a dictionary of tensors, where the
The feature store is initialized with a dictionary of tensors, where the
key is the name of a feature and the value is the tensor. The value can
key is the name of a feature and the value is the tensor. The value can
be multi-dimensional, where the first dimension is the index of the
be multi-dimensional, where the first dimension is the index of the
feature.
feature.
Note that the values can be in memory or on disk.
Parameters
Parameters
----------
----------
feature_dict : dict, optional
feature_dict : dict, optional
...
@@ -74,7 +76,7 @@ class InMemoryFeatureStore(FeatureStore):
...
@@ -74,7 +76,7 @@ class InMemoryFeatureStore(FeatureStore):
... "item": torch.arange(0, 6),
... "item": torch.arange(0, 6),
... "rel": torch.arange(0, 6).view(2, 3),
... "rel": torch.arange(0, 6).view(2, 3),
... }
... }
>>> feature_store =
InMemory
FeatureStore(feature_dict)
>>> feature_store =
TorchBased
FeatureStore(feature_dict)
>>> feature_store.read("user", torch.tensor([0, 1, 2]))
>>> feature_store.read("user", torch.tensor([0, 1, 2]))
tensor([0, 1, 2])
tensor([0, 1, 2])
>>> feature_store.read("item", torch.tensor([0, 1, 2]))
>>> feature_store.read("item", torch.tensor([0, 1, 2]))
...
@@ -85,18 +87,35 @@ class InMemoryFeatureStore(FeatureStore):
...
@@ -85,18 +87,35 @@ class InMemoryFeatureStore(FeatureStore):
... torch.ones(3, dtype=torch.long), torch.tensor([0, 1, 2]))
... torch.ones(3, dtype=torch.long), torch.tensor([0, 1, 2]))
>>> feature_store.read("user", torch.tensor([0, 1, 2]))
>>> feature_store.read("user", torch.tensor([0, 1, 2]))
tensor([1, 1, 1])
tensor([1, 1, 1])
>>> import numpy as np
>>> user = np.arange(0, 5)
>>> item = np.arange(0, 6)
>>> np.save("/tmp/user.npy", user)
>>. np.save("/tmp/item.npy", item)
>>> feature_dict = {
... "user": torch.as_tensor(np.load("/tmp/user.npy",
... mmap_mode="r+")),
... "item": torch.as_tensor(np.load("/tmp/item.npy",
... mmap_mode="r+")),
... }
>>> feature_store = TorchBasedFeatureStore(feature_dict)
>>> feature_store.read("user", torch.tensor([0, 1, 2]))
tensor([0, 1, 2])
>>> feature_store.read("item", torch.tensor([3, 4, 2]))
tensor([3, 4, 2])
"""
"""
super
(
InMemory
FeatureStore
,
self
).
__init__
()
super
(
TorchBased
FeatureStore
,
self
).
__init__
()
assert
isinstance
(
feature_dict
,
dict
),
(
assert
isinstance
(
feature_dict
,
dict
),
(
f
"feature_dict in
InMemory
FeatureStore must be dict, "
f
"feature_dict in
TorchBased
FeatureStore must be dict, "
f
"but got
{
type
(
feature_dict
)
}
."
f
"but got
{
type
(
feature_dict
)
}
."
)
)
for
k
,
v
in
feature_dict
.
items
():
for
k
,
v
in
feature_dict
.
items
():
assert
isinstance
(
assert
isinstance
(
k
,
str
k
,
str
),
f
"Key in
InMemory
FeatureStore must be str, but got
{
k
}
."
),
f
"Key in
TorchBased
FeatureStore must be str, but got
{
k
}
."
assert
isinstance
(
v
,
torch
.
Tensor
),
(
assert
isinstance
(
v
,
torch
.
Tensor
),
(
f
"Value in
InMemory
FeatureStore must be torch.Tensor,"
f
"Value in
TorchBased
FeatureStore must be torch.Tensor,"
f
"but got
{
v
}
."
f
"but got
{
v
}
."
)
)
...
@@ -105,6 +124,9 @@ class InMemoryFeatureStore(FeatureStore):
...
@@ -105,6 +124,9 @@ class InMemoryFeatureStore(FeatureStore):
def
read
(
self
,
key
:
str
,
ids
:
torch
.
Tensor
=
None
):
def
read
(
self
,
key
:
str
,
ids
:
torch
.
Tensor
=
None
):
"""Read a feature from the feature store by index.
"""Read a feature from the feature store by index.
The returned feature is always in memory, no matter whether the feature
to read is in memory or on disk.
Parameters
Parameters
----------
----------
key : str
key : str
...
...
tests/python/pytorch/graphbolt/test_feature_store.py
View file @
40dcc715
import
os
import
tempfile
import
numpy
as
np
import
pytest
import
pytest
import
torch
import
torch
from
dgl
import
graphbolt
as
gb
from
dgl
import
graphbolt
as
gb
def
test_in_memory_feature_store
():
def
to_on_disk_tensor
(
test_dir
,
name
,
t
):
a
=
torch
.
tensor
([
1
,
2
,
3
])
path
=
os
.
path
.
join
(
test_dir
,
name
+
".npy"
)
b
=
torch
.
tensor
([
3
,
4
,
5
])
t
=
t
.
numpy
()
c
=
torch
.
tensor
([[
1
,
2
,
3
],
[
4
,
5
,
6
]])
np
.
save
(
path
,
t
)
feature_store
=
gb
.
InMemoryFeatureStore
({
"a"
:
a
,
"b"
:
b
,
"c"
:
c
})
# The Pytorch tensor is a view of the numpy array on disk, which does not
assert
torch
.
equal
(
feature_store
.
read
(
"a"
),
torch
.
tensor
([
1
,
2
,
3
]))
# consume memory.
assert
torch
.
equal
(
feature_store
.
read
(
"b"
),
torch
.
tensor
([
3
,
4
,
5
]))
t
=
torch
.
as_tensor
(
np
.
load
(
path
,
mmap_mode
=
"r+"
))
assert
torch
.
equal
(
return
t
feature_store
.
read
(
"a"
,
torch
.
tensor
([
0
,
2
])),
torch
.
tensor
([
1
,
3
]),
)
@
pytest
.
mark
.
parametrize
(
"in_memory"
,
[
True
,
False
])
assert
torch
.
equal
(
def
test_torch_based_feature_store
(
in_memory
):
feature_store
.
read
(
"a"
,
torch
.
tensor
([
1
,
1
])),
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
torch
.
tensor
([
2
,
2
]),
a
=
torch
.
tensor
([
1
,
2
,
3
])
)
b
=
torch
.
tensor
([
3
,
4
,
5
])
assert
torch
.
equal
(
c
=
torch
.
tensor
([[
1
,
2
,
3
],
[
4
,
5
,
6
]])
feature_store
.
read
(
"c"
,
torch
.
tensor
([
1
])),
if
not
in_memory
:
torch
.
tensor
([[
4
,
5
,
6
]]),
a
=
to_on_disk_tensor
(
test_dir
,
"a"
,
a
)
)
b
=
to_on_disk_tensor
(
test_dir
,
"b"
,
b
)
feature_store
.
update
(
"a"
,
torch
.
tensor
([
0
,
1
,
2
]))
c
=
to_on_disk_tensor
(
test_dir
,
"c"
,
c
)
assert
torch
.
equal
(
feature_store
.
read
(
"a"
),
torch
.
tensor
([
0
,
1
,
2
]))
assert
torch
.
equal
(
feature_store
=
gb
.
TorchBasedFeatureStore
({
"a"
:
a
,
"b"
:
b
,
"c"
:
c
})
feature_store
.
read
(
"a"
,
torch
.
tensor
([
0
,
2
])),
assert
torch
.
equal
(
feature_store
.
read
(
"a"
),
torch
.
tensor
([
1
,
2
,
3
]))
torch
.
tensor
([
0
,
2
]),
assert
torch
.
equal
(
feature_store
.
read
(
"b"
),
torch
.
tensor
([
3
,
4
,
5
]))
)
assert
torch
.
equal
(
with
pytest
.
raises
(
AssertionError
):
feature_store
.
read
(
"a"
,
torch
.
tensor
([
0
,
2
])),
feature_store
.
read
(
"d"
)
torch
.
tensor
([
1
,
3
]),
)
with
pytest
.
raises
(
IndexError
):
assert
torch
.
equal
(
feature_store
.
read
(
"a"
,
torch
.
tensor
([
0
,
1
,
2
,
3
]))
feature_store
.
read
(
"a"
,
torch
.
tensor
([
1
,
1
])),
torch
.
tensor
([
2
,
2
]),
)
assert
torch
.
equal
(
feature_store
.
read
(
"c"
,
torch
.
tensor
([
1
])),
torch
.
tensor
([[
4
,
5
,
6
]]),
)
feature_store
.
update
(
"a"
,
torch
.
tensor
([
0
,
1
,
2
]))
assert
torch
.
equal
(
feature_store
.
read
(
"a"
),
torch
.
tensor
([
0
,
1
,
2
]))
assert
torch
.
equal
(
feature_store
.
read
(
"a"
,
torch
.
tensor
([
0
,
2
])),
torch
.
tensor
([
0
,
2
]),
)
with
pytest
.
raises
(
AssertionError
):
feature_store
.
read
(
"d"
)
with
pytest
.
raises
(
IndexError
):
feature_store
.
read
(
"a"
,
torch
.
tensor
([
0
,
1
,
2
,
3
]))
# For windows, the file is locked by the numpy.load. We need to delete
# it before closing the temporary directory.
a
=
b
=
c
=
feature_store
=
None
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment