Unverified Commit 5ba2620f authored by yxy235's avatar yxy235 Committed by GitHub
Browse files

[Graphbolt] Support get feature size. (#6374)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
parent f971e25a
......@@ -26,6 +26,16 @@ class Feature:
"""
raise NotImplementedError
def size(self):
"""Get the size of the feature.
Returns
-------
torch.Size
The size of the feature.
"""
raise NotImplementedError
def update(self, value: torch.Tensor, ids: torch.Tensor = None):
"""Update the feature.
......@@ -77,6 +87,29 @@ class FeatureStore:
"""
raise NotImplementedError
def size(
self,
domain: str,
type_name: str,
feature_name: str,
):
"""Get the size of the specified feature in the feature store.
Parameters
----------
domain : str
The domain of the feature such as "node", "edge" or "graph".
type_name : str
The node or edge type name.
feature_name : str
The feature name.
Returns
-------
torch.Size
The size of the specified feature in the feature store.
"""
raise NotImplementedError
def update(
self,
domain: str,
......
......@@ -57,6 +57,28 @@ class BasicFeatureStore(FeatureStore):
"""
return self._features[(domain, type_name, feature_name)].read(ids)
def size(
self,
domain: str,
type_name: str,
feature_name: str,
):
"""Get the size of the specified feature in the feature store.
Parameters
----------
domain : str
The domain of the feature such as "node", "edge" or "graph".
type_name : str
The node or edge type name.
feature_name : str
The feature name.
Returns
-------
torch.Size
The size of the specified feature in the feature store.
"""
return self._features[(domain, type_name, feature_name)].size()
def update(
self,
domain: str,
......
......@@ -40,6 +40,8 @@ class GPUCachedFeature(Feature):
>>> feature.read(torch.tensor([0, 1]).to("cuda"))
tensor([[0, 1, 2, 3, 4],
[1, 1, 1, 1, 1]], device='cuda:0')
>>> feature.size()
torch.Size([5])
"""
super(GPUCachedFeature, self).__init__()
assert isinstance(fallback_feature, Feature), (
......@@ -83,6 +85,16 @@ class GPUCachedFeature(Feature):
self._feature.replace(missing_keys, missing_values)
return torch.reshape(values, self.item_shape)
def size(self):
"""Get the size of the feature.
Returns
-------
torch.Size
The size of the feature.
"""
return self._fallback_feature.size()
def update(self, value: torch.Tensor, ids: torch.Tensor = None):
"""Update the feature.
......
......@@ -41,6 +41,8 @@ class TorchBasedFeature(Feature):
>>> feature.read(torch.tensor([0, 1]))
tensor([[0, 1, 2, 3, 4],
[1, 1, 1, 1, 1]])
>>> feature.size()
torch.Size([5])
>>> import numpy as np
>>> arr = np.array([[1, 2], [3, 4]])
......@@ -85,6 +87,15 @@ class TorchBasedFeature(Feature):
return self._tensor
return self._tensor[ids]
def size(self):
"""Get the size of the feature.
-------
torch.Size
The size of the feature.
"""
return self._tensor.size()[1:]
def update(self, value: torch.Tensor, ids: torch.Tensor = None):
"""Update the feature store.
......@@ -100,18 +111,21 @@ class TorchBasedFeature(Feature):
updated.
"""
if ids is None:
assert self._tensor.shape == value.shape, (
assert self.size() == value.size()[1:], (
f"ids is None, so the entire feature will be updated. "
f"But the shape of the feature is {self._tensor.shape}, "
f"while the shape of the value is {value.shape}."
f"But the size of the feature is {self.size()}, "
f"while the size of the value is {value.size()[1:]}."
)
self._tensor[:] = value
self._tensor = value
else:
assert ids.shape[0] == value.shape[0], (
f"ids and value must have the same length, "
f"but got {ids.shape[0]} and {value.shape[0]}."
)
# [Todo] Check the value feature size matches tesnsor's one.
assert self.size() == value.size()[1:], (
f"The size of the feature is {self.size()}, "
f"while the size of the value is {value.size()[1:]}."
)
self._tensor[ids] = value
......
......@@ -34,6 +34,10 @@ def test_basic_feature_store_homo():
torch.tensor([[[1, 2], [3, 4]]]),
)
# Test get the size of the entire feature.
assert feature_store.size("node", None, "a") == torch.Size([3])
assert feature_store.size("node", None, "b") == torch.Size([2, 2])
def test_basic_feature_store_hetero():
a = torch.tensor([[1, 2, 4], [2, 5, 3]])
......@@ -41,7 +45,7 @@ def test_basic_feature_store_hetero():
features = {}
features[("node", "author", "a")] = gb.TorchBasedFeature(a)
features[("edge", "paper:cites:paper", "b")] = gb.TorchBasedFeature(b)
features[("edge", "paper:cites", "b")] = gb.TorchBasedFeature(b)
feature_store = gb.BasicFeatureStore(features)
......@@ -51,7 +55,7 @@ def test_basic_feature_store_hetero():
torch.tensor([[1, 2, 4], [2, 5, 3]]),
)
assert torch.equal(
feature_store.read("edge", "paper:cites:paper", "b"),
feature_store.read("edge", "paper:cites", "b"),
torch.tensor([[[6], [8]], [[8], [9]]]),
)
......@@ -61,6 +65,10 @@ def test_basic_feature_store_hetero():
torch.tensor([[1, 2, 4]]),
)
# Test get the size of the entire feature.
assert feature_store.size("node", "author", "a") == torch.Size([3])
assert feature_store.size("edge", "paper:cites", "b") == torch.Size([2, 1])
def test_basic_feature_store_errors():
a = torch.tensor([3, 2, 1])
......
......@@ -34,6 +34,10 @@ def test_gpu_cached_feature():
),
)
# Test get the size of the entire feature with ids.
assert feat_store_a.size() == torch.Size([3])
assert feat_store_b.size() == torch.Size([2, 2])
# Test update the entire feature.
feat_store_a.update(
torch.tensor([[0.0, 1.0, 2.0], [3.0, 5.0, 2.0]]).to("cuda")
......
......@@ -35,6 +35,8 @@ def test_torch_based_feature(in_memory):
assert torch.equal(
feature_a.read(), torch.tensor([[1, 2, 3], [4, 5, 6]])
)
# Test read the feature with ids.
assert torch.equal(
feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]])
)
......@@ -57,6 +59,25 @@ def test_torch_based_feature(in_memory):
feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[1, 2], [3, 4]]])
)
# Test update the feature.
feature_a.update(torch.tensor([[5, 1, 3]]))
assert torch.equal(
feature_a.read(),
torch.tensor([[5, 1, 3]]),
), print(feature_a.read())
feature_b.update(
torch.tensor([[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]])
)
assert torch.equal(
feature_b.read(),
torch.tensor(
[[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]]
),
)
# Test get the size of the entire feature.
assert feature_a.size() == torch.Size([3])
assert feature_b.size() == torch.Size([2, 2])
with pytest.raises(IndexError):
feature_a.read(torch.tensor([0, 1, 2, 3]))
......@@ -102,6 +123,8 @@ def test_torch_based_feature_store(in_memory):
),
]
feature_store = gb.TorchBasedFeatureStore(feature_data)
# Test read the entire feature.
assert torch.equal(
feature_store.read("node", "paper", "a"),
torch.tensor([[1, 2, 4], [2, 5, 3]]),
......@@ -111,6 +134,12 @@ def test_torch_based_feature_store(in_memory):
torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]),
)
# Test get the size of the entire feature.
assert feature_store.size("node", "paper", "a") == torch.Size([3])
assert feature_store.size(
"edge", "paper:cites:paper", "b"
) == torch.Size([2, 2])
# For windows, the file is locked by the numpy.load. We need to delete
# it before closing the temporary directory.
a = b = None
......@@ -138,8 +167,12 @@ def test_torch_based_feature_store(in_memory):
),
]
feature_store = gb.TorchBasedFeatureStore(feature_data)
# Test read the entire feature.
assert torch.equal(
feature_store.read("node", None, "a"),
torch.tensor([[1, 2, 4], [2, 5, 3]]),
)
# Test get the size of the entire feature.
assert feature_store.size("node", None, "a") == torch.Size([3])
feature_store = None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment