Unverified Commit 5ba2620f authored by yxy235's avatar yxy235 Committed by GitHub
Browse files

[Graphbolt] Support get feature size. (#6374)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
parent f971e25a
...@@ -26,6 +26,16 @@ class Feature: ...@@ -26,6 +26,16 @@ class Feature:
""" """
raise NotImplementedError raise NotImplementedError
def size(self):
"""Get the size of the feature.
Returns
-------
torch.Size
The size of the feature.
"""
raise NotImplementedError
def update(self, value: torch.Tensor, ids: torch.Tensor = None): def update(self, value: torch.Tensor, ids: torch.Tensor = None):
"""Update the feature. """Update the feature.
...@@ -77,6 +87,29 @@ class FeatureStore: ...@@ -77,6 +87,29 @@ class FeatureStore:
""" """
raise NotImplementedError raise NotImplementedError
def size(
self,
domain: str,
type_name: str,
feature_name: str,
):
"""Get the size of the specified feature in the feature store.
Parameters
----------
domain : str
The domain of the feature such as "node", "edge" or "graph".
type_name : str
The node or edge type name.
feature_name : str
The feature name.
Returns
-------
torch.Size
The size of the specified feature in the feature store.
"""
raise NotImplementedError
def update( def update(
self, self,
domain: str, domain: str,
......
...@@ -57,6 +57,28 @@ class BasicFeatureStore(FeatureStore): ...@@ -57,6 +57,28 @@ class BasicFeatureStore(FeatureStore):
""" """
return self._features[(domain, type_name, feature_name)].read(ids) return self._features[(domain, type_name, feature_name)].read(ids)
def size(
self,
domain: str,
type_name: str,
feature_name: str,
):
"""Get the size of the specified feature in the feature store.
Parameters
----------
domain : str
The domain of the feature such as "node", "edge" or "graph".
type_name : str
The node or edge type name.
feature_name : str
The feature name.
Returns
-------
torch.Size
The size of the specified feature in the feature store.
"""
return self._features[(domain, type_name, feature_name)].size()
def update( def update(
self, self,
domain: str, domain: str,
......
...@@ -40,6 +40,8 @@ class GPUCachedFeature(Feature): ...@@ -40,6 +40,8 @@ class GPUCachedFeature(Feature):
>>> feature.read(torch.tensor([0, 1]).to("cuda")) >>> feature.read(torch.tensor([0, 1]).to("cuda"))
tensor([[0, 1, 2, 3, 4], tensor([[0, 1, 2, 3, 4],
[1, 1, 1, 1, 1]], device='cuda:0') [1, 1, 1, 1, 1]], device='cuda:0')
>>> feature.size()
torch.Size([5])
""" """
super(GPUCachedFeature, self).__init__() super(GPUCachedFeature, self).__init__()
assert isinstance(fallback_feature, Feature), ( assert isinstance(fallback_feature, Feature), (
...@@ -83,6 +85,16 @@ class GPUCachedFeature(Feature): ...@@ -83,6 +85,16 @@ class GPUCachedFeature(Feature):
self._feature.replace(missing_keys, missing_values) self._feature.replace(missing_keys, missing_values)
return torch.reshape(values, self.item_shape) return torch.reshape(values, self.item_shape)
def size(self):
"""Get the size of the feature.
Returns
-------
torch.Size
The size of the feature.
"""
return self._fallback_feature.size()
def update(self, value: torch.Tensor, ids: torch.Tensor = None): def update(self, value: torch.Tensor, ids: torch.Tensor = None):
"""Update the feature. """Update the feature.
......
...@@ -41,6 +41,8 @@ class TorchBasedFeature(Feature): ...@@ -41,6 +41,8 @@ class TorchBasedFeature(Feature):
>>> feature.read(torch.tensor([0, 1])) >>> feature.read(torch.tensor([0, 1]))
tensor([[0, 1, 2, 3, 4], tensor([[0, 1, 2, 3, 4],
[1, 1, 1, 1, 1]]) [1, 1, 1, 1, 1]])
>>> feature.size()
torch.Size([5])
>>> import numpy as np >>> import numpy as np
>>> arr = np.array([[1, 2], [3, 4]]) >>> arr = np.array([[1, 2], [3, 4]])
...@@ -85,6 +87,15 @@ class TorchBasedFeature(Feature): ...@@ -85,6 +87,15 @@ class TorchBasedFeature(Feature):
return self._tensor return self._tensor
return self._tensor[ids] return self._tensor[ids]
def size(self):
"""Get the size of the feature.
-------
torch.Size
The size of the feature.
"""
return self._tensor.size()[1:]
def update(self, value: torch.Tensor, ids: torch.Tensor = None): def update(self, value: torch.Tensor, ids: torch.Tensor = None):
"""Update the feature store. """Update the feature store.
...@@ -100,18 +111,21 @@ class TorchBasedFeature(Feature): ...@@ -100,18 +111,21 @@ class TorchBasedFeature(Feature):
updated. updated.
""" """
if ids is None: if ids is None:
assert self._tensor.shape == value.shape, ( assert self.size() == value.size()[1:], (
f"ids is None, so the entire feature will be updated. " f"ids is None, so the entire feature will be updated. "
f"But the shape of the feature is {self._tensor.shape}, " f"But the size of the feature is {self.size()}, "
f"while the shape of the value is {value.shape}." f"while the size of the value is {value.size()[1:]}."
) )
self._tensor[:] = value self._tensor = value
else: else:
assert ids.shape[0] == value.shape[0], ( assert ids.shape[0] == value.shape[0], (
f"ids and value must have the same length, " f"ids and value must have the same length, "
f"but got {ids.shape[0]} and {value.shape[0]}." f"but got {ids.shape[0]} and {value.shape[0]}."
) )
# [Todo] Check the value feature size matches tesnsor's one. assert self.size() == value.size()[1:], (
f"The size of the feature is {self.size()}, "
f"while the size of the value is {value.size()[1:]}."
)
self._tensor[ids] = value self._tensor[ids] = value
......
...@@ -34,6 +34,10 @@ def test_basic_feature_store_homo(): ...@@ -34,6 +34,10 @@ def test_basic_feature_store_homo():
torch.tensor([[[1, 2], [3, 4]]]), torch.tensor([[[1, 2], [3, 4]]]),
) )
# Test get the size of the entire feature.
assert feature_store.size("node", None, "a") == torch.Size([3])
assert feature_store.size("node", None, "b") == torch.Size([2, 2])
def test_basic_feature_store_hetero(): def test_basic_feature_store_hetero():
a = torch.tensor([[1, 2, 4], [2, 5, 3]]) a = torch.tensor([[1, 2, 4], [2, 5, 3]])
...@@ -41,7 +45,7 @@ def test_basic_feature_store_hetero(): ...@@ -41,7 +45,7 @@ def test_basic_feature_store_hetero():
features = {} features = {}
features[("node", "author", "a")] = gb.TorchBasedFeature(a) features[("node", "author", "a")] = gb.TorchBasedFeature(a)
features[("edge", "paper:cites:paper", "b")] = gb.TorchBasedFeature(b) features[("edge", "paper:cites", "b")] = gb.TorchBasedFeature(b)
feature_store = gb.BasicFeatureStore(features) feature_store = gb.BasicFeatureStore(features)
...@@ -51,7 +55,7 @@ def test_basic_feature_store_hetero(): ...@@ -51,7 +55,7 @@ def test_basic_feature_store_hetero():
torch.tensor([[1, 2, 4], [2, 5, 3]]), torch.tensor([[1, 2, 4], [2, 5, 3]]),
) )
assert torch.equal( assert torch.equal(
feature_store.read("edge", "paper:cites:paper", "b"), feature_store.read("edge", "paper:cites", "b"),
torch.tensor([[[6], [8]], [[8], [9]]]), torch.tensor([[[6], [8]], [[8], [9]]]),
) )
...@@ -61,6 +65,10 @@ def test_basic_feature_store_hetero(): ...@@ -61,6 +65,10 @@ def test_basic_feature_store_hetero():
torch.tensor([[1, 2, 4]]), torch.tensor([[1, 2, 4]]),
) )
# Test get the size of the entire feature.
assert feature_store.size("node", "author", "a") == torch.Size([3])
assert feature_store.size("edge", "paper:cites", "b") == torch.Size([2, 1])
def test_basic_feature_store_errors(): def test_basic_feature_store_errors():
a = torch.tensor([3, 2, 1]) a = torch.tensor([3, 2, 1])
......
...@@ -34,6 +34,10 @@ def test_gpu_cached_feature(): ...@@ -34,6 +34,10 @@ def test_gpu_cached_feature():
), ),
) )
# Test get the size of the entire feature with ids.
assert feat_store_a.size() == torch.Size([3])
assert feat_store_b.size() == torch.Size([2, 2])
# Test update the entire feature. # Test update the entire feature.
feat_store_a.update( feat_store_a.update(
torch.tensor([[0.0, 1.0, 2.0], [3.0, 5.0, 2.0]]).to("cuda") torch.tensor([[0.0, 1.0, 2.0], [3.0, 5.0, 2.0]]).to("cuda")
......
...@@ -35,6 +35,8 @@ def test_torch_based_feature(in_memory): ...@@ -35,6 +35,8 @@ def test_torch_based_feature(in_memory):
assert torch.equal( assert torch.equal(
feature_a.read(), torch.tensor([[1, 2, 3], [4, 5, 6]]) feature_a.read(), torch.tensor([[1, 2, 3], [4, 5, 6]])
) )
# Test read the feature with ids.
assert torch.equal( assert torch.equal(
feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]]) feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]])
) )
...@@ -57,6 +59,25 @@ def test_torch_based_feature(in_memory): ...@@ -57,6 +59,25 @@ def test_torch_based_feature(in_memory):
feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[1, 2], [3, 4]]]) feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[1, 2], [3, 4]]])
) )
# Test update the feature.
feature_a.update(torch.tensor([[5, 1, 3]]))
assert torch.equal(
feature_a.read(),
torch.tensor([[5, 1, 3]]),
), print(feature_a.read())
feature_b.update(
torch.tensor([[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]])
)
assert torch.equal(
feature_b.read(),
torch.tensor(
[[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]]
),
)
# Test get the size of the entire feature.
assert feature_a.size() == torch.Size([3])
assert feature_b.size() == torch.Size([2, 2])
with pytest.raises(IndexError): with pytest.raises(IndexError):
feature_a.read(torch.tensor([0, 1, 2, 3])) feature_a.read(torch.tensor([0, 1, 2, 3]))
...@@ -102,6 +123,8 @@ def test_torch_based_feature_store(in_memory): ...@@ -102,6 +123,8 @@ def test_torch_based_feature_store(in_memory):
), ),
] ]
feature_store = gb.TorchBasedFeatureStore(feature_data) feature_store = gb.TorchBasedFeatureStore(feature_data)
# Test read the entire feature.
assert torch.equal( assert torch.equal(
feature_store.read("node", "paper", "a"), feature_store.read("node", "paper", "a"),
torch.tensor([[1, 2, 4], [2, 5, 3]]), torch.tensor([[1, 2, 4], [2, 5, 3]]),
...@@ -111,6 +134,12 @@ def test_torch_based_feature_store(in_memory): ...@@ -111,6 +134,12 @@ def test_torch_based_feature_store(in_memory):
torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]), torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]),
) )
# Test get the size of the entire feature.
assert feature_store.size("node", "paper", "a") == torch.Size([3])
assert feature_store.size(
"edge", "paper:cites:paper", "b"
) == torch.Size([2, 2])
# For windows, the file is locked by the numpy.load. We need to delete # For windows, the file is locked by the numpy.load. We need to delete
# it before closing the temporary directory. # it before closing the temporary directory.
a = b = None a = b = None
...@@ -138,8 +167,12 @@ def test_torch_based_feature_store(in_memory): ...@@ -138,8 +167,12 @@ def test_torch_based_feature_store(in_memory):
), ),
] ]
feature_store = gb.TorchBasedFeatureStore(feature_data) feature_store = gb.TorchBasedFeatureStore(feature_data)
# Test read the entire feature.
assert torch.equal( assert torch.equal(
feature_store.read("node", None, "a"), feature_store.read("node", None, "a"),
torch.tensor([[1, 2, 4], [2, 5, 3]]), torch.tensor([[1, 2, 4], [2, 5, 3]]),
) )
# Test get the size of the entire feature.
assert feature_store.size("node", None, "a") == torch.Size([3])
feature_store = None feature_store = None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment