Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
f50cc3ca
Unverified
Commit
f50cc3ca
authored
Jul 07, 2023
by
Rhett Ying
Committed by
GitHub
Jul 07, 2023
Browse files
[GraphBolt] update FeatureStore to use tuple as key (#5965)
parent
90a308f3
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
72 additions
and
24 deletions
+72
-24
python/dgl/graphbolt/feature_store.py
python/dgl/graphbolt/feature_store.py
+28
-17
tests/python/pytorch/graphbolt/test_feature_store.py
tests/python/pytorch/graphbolt/test_feature_store.py
+44
-7
No files found.
python/dgl/graphbolt/feature_store.py
View file @
f50cc3ca
"""Feature store for GraphBolt."""
"""Feature store for GraphBolt."""
from
typing
import
List
from
typing
import
List
,
Optional
import
numpy
as
np
import
numpy
as
np
import
pydantic
import
pydantic
...
@@ -142,21 +142,30 @@ class TorchBasedFeatureStore(FeatureStore):
...
@@ -142,21 +142,30 @@ class TorchBasedFeatureStore(FeatureStore):
self
.
_tensor
[
ids
]
=
value
self
.
_tensor
[
ids
]
=
value
# FIXME(Rui): To avoid circular import, we make a copy of `OnDiskDataFormatEnum`
# [TODO] Move code to 'impl/' and separate OnDisk-related code to another file.
# from dataset.py. We need to merge the two definitions later.
class
FeatureDataFormatEnum
(
pydantic_yaml
.
YamlStrEnum
):
class
OnDiskDataFormatEnum
(
pydantic_yaml
.
YamlStrEnum
):
"""Enum of feature data format."""
"""Enum of data format."""
TORCH
=
"torch"
TORCH
=
"torch"
NUMPY
=
"numpy"
NUMPY
=
"numpy"
class
FeatureDataDomainEnum
(
pydantic_yaml
.
YamlStrEnum
):
"""Enum of feature data domain."""
NODE
=
"node"
EDGE
=
"edge"
GRAPH
=
"graph"
class
OnDiskFeatureData
(
pydantic
.
BaseModel
):
class
OnDiskFeatureData
(
pydantic
.
BaseModel
):
r
"""The description of an on-disk feature."""
r
"""The description of an on-disk feature."""
domain
:
FeatureDataDomainEnum
type
:
Optional
[
str
]
name
:
str
name
:
str
format
:
OnDisk
DataFormatEnum
format
:
Feature
DataFormatEnum
path
:
str
path
:
str
in_memory
:
bool
=
True
in_memory
:
Optional
[
bool
]
=
True
def
load_feature_stores
(
feat_data
:
List
[
OnDiskFeatureData
]):
def
load_feature_stores
(
feat_data
:
List
[
OnDiskFeatureData
]):
...
@@ -186,25 +195,27 @@ def load_feature_stores(feat_data: List[OnDiskFeatureData]):
...
@@ -186,25 +195,27 @@ def load_feature_stores(feat_data: List[OnDiskFeatureData]):
>>> import torch
>>> import torch
>>> import numpy as np
>>> import numpy as np
>>> from dgl import graphbolt as gb
>>> from dgl import graphbolt as gb
>>>
a
= torch.tensor([1, 2, 3])
>>>
edge_label
= torch.tensor([1, 2, 3])
>>>
b
= torch.tensor([[1, 2, 3], [4, 5, 6]])
>>>
node_feat
= torch.tensor([[1, 2, 3], [4, 5, 6]])
>>> torch.save(
a, "/tmp/a
.pt")
>>> torch.save(
edge_label, "/tmp/edge_label
.pt")
>>> np.save("/tmp/
b.npy", b
.numpy())
>>> np.save("/tmp/
node_feat.npy", node_feat
.numpy())
>>> feat_data = [
>>> feat_data = [
... gb.OnDiskFeatureData(name="a", format="torch", path="/tmp/a.pt",
... gb.OnDiskFeatureData(domain="edge", type="author:writes:paper",
... name="label", format="torch", path="/tmp/edge_label.pt",
... in_memory=True),
... in_memory=True),
... gb.OnDiskFeatureData(
name="b", format="numpy", path="/tmp/b.npy
",
... gb.OnDiskFeatureData(
domain="node", type="paper", name="feat
",
... in_memory=False),
...
format="numpy", path="/tmp/node_feat.npy",
in_memory=False),
... ]
... ]
>>> gb.load_feature_stores(feat_data)
>>> gb.load_feature_stores(feat_data)
... {'a': <dgl.graphbolt.feature_store.TorchBasedFeatureStore object at
... {("edge", "author:writes:paper", "label"):
... 0x7ff093cb4df0>, 'b':
... <dgl.graphbolt.feature_store.TorchBasedFeatureStore object at
... 0x7ff093cb4df0>, ("node", "paper", "feat"):
... <dgl.graphbolt.feature_store.TorchBasedFeatureStore object at
... <dgl.graphbolt.feature_store.TorchBasedFeatureStore object at
... 0x7ff093cb4dc0>}
... 0x7ff093cb4dc0>}
"""
"""
feat_stores
=
{}
feat_stores
=
{}
for
spec
in
feat_data
:
for
spec
in
feat_data
:
key
=
spec
.
name
key
=
(
spec
.
domain
,
spec
.
type
,
spec
.
name
)
if
spec
.
format
==
"torch"
:
if
spec
.
format
==
"torch"
:
assert
spec
.
in_memory
,
(
assert
spec
.
in_memory
,
(
f
"Pytorch tensor can only be loaded in memory, "
f
"Pytorch tensor can only be loaded in memory, "
...
...
tests/python/pytorch/graphbolt/test_feature_store.py
View file @
f50cc3ca
...
@@ -2,6 +2,7 @@ import os
...
@@ -2,6 +2,7 @@ import os
import
tempfile
import
tempfile
import
numpy
as
np
import
numpy
as
np
import
pydantic
import
pytest
import
pytest
import
torch
import
torch
from
dgl
import
graphbolt
as
gb
from
dgl
import
graphbolt
as
gb
...
@@ -59,12 +60,14 @@ def test_torch_based_feature_store(in_memory):
...
@@ -59,12 +60,14 @@ def test_torch_based_feature_store(in_memory):
feat_store_a
=
feat_store_b
=
None
feat_store_a
=
feat_store_b
=
None
def
write_tensor_to_disk
(
dir
,
name
,
t
,
fmt
=
"
p
t"
):
def
write_tensor_to_disk
(
dir
,
name
,
t
,
fmt
=
"t
orch
"
):
if
fmt
==
"
p
t"
:
if
fmt
==
"t
orch
"
:
torch
.
save
(
t
,
os
.
path
.
join
(
dir
,
name
+
".pt"
))
torch
.
save
(
t
,
os
.
path
.
join
(
dir
,
name
+
".pt"
))
el
se
:
el
if
fmt
==
"numpy"
:
t
=
t
.
numpy
()
t
=
t
.
numpy
()
np
.
save
(
os
.
path
.
join
(
dir
,
name
+
".npy"
),
t
)
np
.
save
(
os
.
path
.
join
(
dir
,
name
+
".npy"
),
t
)
else
:
raise
ValueError
(
f
"Unsupported format:
{
fmt
}
"
)
@
pytest
.
mark
.
parametrize
(
"in_memory"
,
[
True
,
False
])
@
pytest
.
mark
.
parametrize
(
"in_memory"
,
[
True
,
False
])
...
@@ -72,16 +75,20 @@ def test_load_feature_stores(in_memory):
...
@@ -72,16 +75,20 @@ def test_load_feature_stores(in_memory):
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
with
tempfile
.
TemporaryDirectory
()
as
test_dir
:
a
=
torch
.
tensor
([
1
,
2
,
3
])
a
=
torch
.
tensor
([
1
,
2
,
3
])
b
=
torch
.
tensor
([
2
,
5
,
3
])
b
=
torch
.
tensor
([
2
,
5
,
3
])
write_tensor_to_disk
(
test_dir
,
"a"
,
a
,
fmt
=
"
p
t"
)
write_tensor_to_disk
(
test_dir
,
"a"
,
a
,
fmt
=
"t
orch
"
)
write_tensor_to_disk
(
test_dir
,
"b"
,
b
,
fmt
=
"npy"
)
write_tensor_to_disk
(
test_dir
,
"b"
,
b
,
fmt
=
"n
um
py"
)
feat_data
=
[
feat_data
=
[
gb
.
OnDiskFeatureData
(
gb
.
OnDiskFeatureData
(
domain
=
"node"
,
type
=
"paper"
,
name
=
"a"
,
name
=
"a"
,
format
=
"torch"
,
format
=
"torch"
,
path
=
os
.
path
.
join
(
test_dir
,
"a.pt"
),
path
=
os
.
path
.
join
(
test_dir
,
"a.pt"
),
in_memory
=
True
,
in_memory
=
True
,
),
),
gb
.
OnDiskFeatureData
(
gb
.
OnDiskFeatureData
(
domain
=
"edge"
,
type
=
"paper-cites-paper"
,
name
=
"b"
,
name
=
"b"
,
format
=
"numpy"
,
format
=
"numpy"
,
path
=
os
.
path
.
join
(
test_dir
,
"b.npy"
),
path
=
os
.
path
.
join
(
test_dir
,
"b.npy"
),
...
@@ -89,10 +96,40 @@ def test_load_feature_stores(in_memory):
...
@@ -89,10 +96,40 @@ def test_load_feature_stores(in_memory):
),
),
]
]
feat_stores
=
gb
.
load_feature_stores
(
feat_data
)
feat_stores
=
gb
.
load_feature_stores
(
feat_data
)
assert
torch
.
equal
(
feat_stores
[
"a"
].
read
(),
torch
.
tensor
([
1
,
2
,
3
]))
assert
torch
.
equal
(
assert
torch
.
equal
(
feat_stores
[
"b"
].
read
(),
torch
.
tensor
([
2
,
5
,
3
]))
feat_stores
[(
"node"
,
"paper"
,
"a"
)].
read
(),
torch
.
tensor
([
1
,
2
,
3
])
)
assert
torch
.
equal
(
feat_stores
[(
"edge"
,
"paper-cites-paper"
,
"b"
)].
read
(),
torch
.
tensor
([
2
,
5
,
3
]),
)
# For windows, the file is locked by the numpy.load. We need to delete
# For windows, the file is locked by the numpy.load. We need to delete
# it before closing the temporary directory.
# it before closing the temporary directory.
a
=
b
=
None
a
=
b
=
None
feat_stores
=
None
feat_stores
=
None
# ``domain`` should be enum.
with
pytest
.
raises
(
pydantic
.
ValidationError
):
_
=
gb
.
OnDiskFeatureData
(
domain
=
"invalid"
,
type
=
"paper"
,
name
=
"a"
,
format
=
"torch"
,
path
=
os
.
path
.
join
(
test_dir
,
"a.pt"
),
in_memory
=
True
,
)
# ``type`` could be null.
feat_data
=
[
gb
.
OnDiskFeatureData
(
domain
=
"node"
,
name
=
"a"
,
format
=
"torch"
,
path
=
os
.
path
.
join
(
test_dir
,
"a.pt"
),
in_memory
=
True
,
),
]
feat_stores
=
gb
.
load_feature_stores
(
feat_data
)
assert
(
"node"
,
None
,
"a"
)
in
feat_stores
feat_stores
=
None
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment