[Graphbolt] Support get feature size. (#6374)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>

[Graphbolt] Support get feature size. (#6374)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
5ba2620f · yxy235 · GitHub · f971e25a · 5ba2620f · 5ba2620f
Unverified Commit 5ba2620f authored Oct 07, 2023 by yxy235 Committed by GitHub Oct 07, 2023
7 changed files
--- a/python/dgl/graphbolt/feature_store.py
+++ b/python/dgl/graphbolt/feature_store.py
@@ -26,6 +26,16 @@ class Feature:
        """
        raise NotImplementedError

+    def size(self):
+        """Get the size of the feature.
+
+        Returns
+        -------
+        torch.Size
+            The size of the feature.
+        """
+        raise NotImplementedError
+
    def update(self, value: torch.Tensor, ids: torch.Tensor = None):
        """Update the feature.

@@ -77,6 +87,29 @@ class FeatureStore:
        """
        raise NotImplementedError

+    def size(
+        self,
+        domain: str,
+        type_name: str,
+        feature_name: str,
+    ):
+        """Get the size of the specified feature in the feature store.
+
+        Parameters
+        ----------
+        domain : str
+            The domain of the feature such as "node", "edge" or "graph".
+        type_name : str
+            The node or edge type name.
+        feature_name : str
+            The feature name.
+        Returns
+        -------
+        torch.Size
+            The size of the specified feature in the feature store.
+        """
+        raise NotImplementedError
+
    def update(
        self,
        domain: str,

--- a/python/dgl/graphbolt/impl/basic_feature_store.py
+++ b/python/dgl/graphbolt/impl/basic_feature_store.py
@@ -57,6 +57,28 @@ class BasicFeatureStore(FeatureStore):
        """
        return self._features[(domain, type_name, feature_name)].read(ids)

+    def size(
+        self,
+        domain: str,
+        type_name: str,
+        feature_name: str,
+    ):
+        """Get the size of the specified feature in the feature store.
+        Parameters
+        ----------
+        domain : str
+            The domain of the feature such as "node", "edge" or "graph".
+        type_name : str
+            The node or edge type name.
+        feature_name : str
+            The feature name.
+        Returns
+        -------
+        torch.Size
+            The size of the specified feature in the feature store.
+        """
+        return self._features[(domain, type_name, feature_name)].size()
+
    def update(
        self,
        domain: str,

--- a/python/dgl/graphbolt/impl/gpu_cached_feature.py
+++ b/python/dgl/graphbolt/impl/gpu_cached_feature.py
@@ -40,6 +40,8 @@ class GPUCachedFeature(Feature):
        >>> feature.read(torch.tensor([0, 1]).to("cuda"))
        tensor([[0, 1, 2, 3, 4],
                [1, 1, 1, 1, 1]], device='cuda:0')
+        >>> feature.size()
+        torch.Size([5])
        """
        super(GPUCachedFeature, self).__init__()
        assert isinstance(fallback_feature, Feature), (
@@ -83,6 +85,16 @@ class GPUCachedFeature(Feature):
        self._feature.replace(missing_keys, missing_values)
        return torch.reshape(values, self.item_shape)

+    def size(self):
+        """Get the size of the feature.
+
+        Returns
+        -------
+        torch.Size
+            The size of the feature.
+        """
+        return self._fallback_feature.size()
+
    def update(self, value: torch.Tensor, ids: torch.Tensor = None):
        """Update the feature.


--- a/python/dgl/graphbolt/impl/torch_based_feature_store.py
+++ b/python/dgl/graphbolt/impl/torch_based_feature_store.py
@@ -41,6 +41,8 @@ class TorchBasedFeature(Feature):
        >>> feature.read(torch.tensor([0, 1]))
        tensor([[0, 1, 2, 3, 4],
                [1, 1, 1, 1, 1]])
+        >>> feature.size()
+        torch.Size([5])

        >>> import numpy as np
        >>> arr = np.array([[1, 2], [3, 4]])
@@ -85,6 +87,15 @@ class TorchBasedFeature(Feature):
            return self._tensor
        return self._tensor[ids]

+    def size(self):
+        """Get the size of the feature.
+
+        -------
+        torch.Size
+            The size of the feature.
+        """
+        return self._tensor.size()[1:]
+
    def update(self, value: torch.Tensor, ids: torch.Tensor = None):
        """Update the feature store.

@@ -100,18 +111,21 @@ class TorchBasedFeature(Feature):
            updated.
        """
        if ids is None:
-            assert self._tensor.shape == value.shape, (
+            assert self.size() == value.size()[1:], (
                f"ids is None, so the entire feature will be updated. "
-                f"But the shape of the feature is {self._tensor.shape}, "
-                f"while the shape of the value is {value.shape}."
+                f"But the size of the feature is {self.size()}, "
+                f"while the size of the value is {value.size()[1:]}."
            )
-            self._tensor[:] = value
+            self._tensor = value
        else:
            assert ids.shape[0] == value.shape[0], (
                f"ids and value must have the same length, "
                f"but got {ids.shape[0]} and {value.shape[0]}."
            )
-            # [Todo] Check the value feature size matches tesnsor's one.
+            assert self.size() == value.size()[1:], (
+                f"The size of the feature is {self.size()}, "
+                f"while the size of the value is {value.size()[1:]}."
+            )
            self._tensor[ids] = value



--- a/tests/python/pytorch/graphbolt/impl/test_basic_feature_store.py
+++ b/tests/python/pytorch/graphbolt/impl/test_basic_feature_store.py
@@ -34,6 +34,10 @@ def test_basic_feature_store_homo():
        torch.tensor([[[1, 2], [3, 4]]]),
    )

+    # Test get the size of the entire feature.
+    assert feature_store.size("node", None, "a") == torch.Size([3])
+    assert feature_store.size("node", None, "b") == torch.Size([2, 2])
+

 def test_basic_feature_store_hetero():
    a = torch.tensor([[1, 2, 4], [2, 5, 3]])
@@ -41,7 +45,7 @@ def test_basic_feature_store_hetero():

    features = {}
    features[("node", "author", "a")] = gb.TorchBasedFeature(a)
-    features[("edge", "paper:cites:paper", "b")] = gb.TorchBasedFeature(b)
+    features[("edge", "paper:cites", "b")] = gb.TorchBasedFeature(b)

    feature_store = gb.BasicFeatureStore(features)

@@ -51,7 +55,7 @@ def test_basic_feature_store_hetero():
        torch.tensor([[1, 2, 4], [2, 5, 3]]),
    )
    assert torch.equal(
-        feature_store.read("edge", "paper:cites:paper", "b"),
+        feature_store.read("edge", "paper:cites", "b"),
        torch.tensor([[[6], [8]], [[8], [9]]]),
    )

@@ -61,6 +65,10 @@ def test_basic_feature_store_hetero():
        torch.tensor([[1, 2, 4]]),
    )

+    # Test get the size of the entire feature.
+    assert feature_store.size("node", "author", "a") == torch.Size([3])
+    assert feature_store.size("edge", "paper:cites", "b") == torch.Size([2, 1])
+

 def test_basic_feature_store_errors():
    a = torch.tensor([3, 2, 1])

--- a/tests/python/pytorch/graphbolt/impl/test_gpu_cached_feature.py
+++ b/tests/python/pytorch/graphbolt/impl/test_gpu_cached_feature.py
@@ -34,6 +34,10 @@ def test_gpu_cached_feature():
        ),
    )

+    # Test get the size of the entire feature with ids.
+    assert feat_store_a.size() == torch.Size([3])
+    assert feat_store_b.size() == torch.Size([2, 2])
+
    # Test update the entire feature.
    feat_store_a.update(
        torch.tensor([[0.0, 1.0, 2.0], [3.0, 5.0, 2.0]]).to("cuda")

--- a/tests/python/pytorch/graphbolt/impl/test_torch_based_feature_store.py
+++ b/tests/python/pytorch/graphbolt/impl/test_torch_based_feature_store.py
@@ -35,6 +35,8 @@ def test_torch_based_feature(in_memory):
        assert torch.equal(
            feature_a.read(), torch.tensor([[1, 2, 3], [4, 5, 6]])
        )
+
+        # Test read the feature with ids.
        assert torch.equal(
            feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[4, 5], [6, 7]]])
        )
@@ -57,6 +59,25 @@ def test_torch_based_feature(in_memory):
            feature_b.read(), torch.tensor([[[1, 2], [3, 4]], [[1, 2], [3, 4]]])
        )

+        # Test update the feature.
+        feature_a.update(torch.tensor([[5, 1, 3]]))
+        assert torch.equal(
+            feature_a.read(),
+            torch.tensor([[5, 1, 3]]),
+        ), print(feature_a.read())
+        feature_b.update(
+            torch.tensor([[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]])
+        )
+        assert torch.equal(
+            feature_b.read(),
+            torch.tensor(
+                [[[1, 3], [5, 7]], [[2, 4], [6, 8]], [[2, 4], [6, 8]]]
+            ),
+        )
+
+        # Test get the size of the entire feature.
+        assert feature_a.size() == torch.Size([3])
+        assert feature_b.size() == torch.Size([2, 2])
        with pytest.raises(IndexError):
            feature_a.read(torch.tensor([0, 1, 2, 3]))

@@ -102,6 +123,8 @@ def test_torch_based_feature_store(in_memory):
            ),
        ]
        feature_store = gb.TorchBasedFeatureStore(feature_data)
+
+        # Test read the entire feature.
        assert torch.equal(
            feature_store.read("node", "paper", "a"),
            torch.tensor([[1, 2, 4], [2, 5, 3]]),
@@ -111,6 +134,12 @@ def test_torch_based_feature_store(in_memory):
            torch.tensor([[[1, 2], [3, 4]], [[2, 5], [3, 4]]]),
        )

+        # Test get the size of the entire feature.
+        assert feature_store.size("node", "paper", "a") == torch.Size([3])
+        assert feature_store.size(
+            "edge", "paper:cites:paper", "b"
+        ) == torch.Size([2, 2])
+
        # For windows, the file is locked by the numpy.load. We need to delete
        # it before closing the temporary directory.
        a = b = None
@@ -138,8 +167,12 @@ def test_torch_based_feature_store(in_memory):
            ),
        ]
        feature_store = gb.TorchBasedFeatureStore(feature_data)
+        # Test read the entire feature.
        assert torch.equal(
            feature_store.read("node", None, "a"),
            torch.tensor([[1, 2, 4], [2, 5, 3]]),
        )
+        # Test get the size of the entire feature.
+        assert feature_store.size("node", None, "a") == torch.Size([3])
+
        feature_store = None