[GraphBolt] Rename and move utils files. (#6627)

Co-authored-by: Ubuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>

[GraphBolt] Rename and move utils files. (#6627)
Co-authored-by: Ubuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
9aca3092 · yxy235 · GitHub · 829c073f · 9aca3092 · 9aca3092
Unverified Commit 9aca3092 authored Nov 28, 2023 by yxy235 Committed by GitHub Nov 28, 2023
15 changed files
--- a/python/dgl/graphbolt/__init__.py
+++ b/python/dgl/graphbolt/__init__.py
@@ -18,13 +18,12 @@ from .minibatch_transformer import *
 from .negative_sampler import *
 from .sampled_subgraph import *
 from .subgraph_sampler import *
-from .utils import (
-    add_reverse_edges,
+from .internal import (
    compact_csc_format,
-    exclude_seed_edges,
    unique_and_compact,
    unique_and_compact_node_pairs,
 )
+from .utils import add_reverse_edges, exclude_seed_edges


 def load_graphbolt():

--- a/python/dgl/graphbolt/dataloader.py
+++ b/python/dgl/graphbolt/dataloader.py
@@ -6,9 +6,9 @@ import torchdata.datapipes as dp

 from .base import CopyTo
 from .feature_fetcher import FeatureFetcher
-from .item_sampler import ItemSampler

-from .utils import datapipe_graph_to_adjlist
+from .internal import datapipe_graph_to_adjlist
+from .item_sampler import ItemSampler


 __all__ = [

--- a/python/dgl/graphbolt/impl/in_subgraph_sampler.py
+++ b/python/dgl/graphbolt/impl/in_subgraph_sampler.py
@@ -2,8 +2,9 @@

 from torch.utils.data import functional_datapipe

+from ..internal import unique_and_compact_node_pairs
+
 from ..subgraph_sampler import SubgraphSampler
-from ..utils import unique_and_compact_node_pairs
 from .sampled_subgraph_impl import FusedSampledSubgraphImpl



--- a/python/dgl/graphbolt/impl/neighbor_sampler.py
+++ b/python/dgl/graphbolt/impl/neighbor_sampler.py
@@ -3,8 +3,9 @@
 import torch
 from torch.utils.data import functional_datapipe

+from ..internal import compact_csc_format, unique_and_compact_node_pairs
+
 from ..subgraph_sampler import SubgraphSampler
-from ..utils import compact_csc_format, unique_and_compact_node_pairs
 from .sampled_subgraph_impl import FusedSampledSubgraphImpl, SampledSubgraphImpl



--- a/python/dgl/graphbolt/impl/ondisk_dataset.py
+++ b/python/dgl/graphbolt/impl/ondisk_dataset.py
@@ -14,9 +14,9 @@ from ...base import dgl_warning
 from ...data.utils import download, extract_archive
 from ..base import etype_str_to_tuple
 from ..dataset import Dataset, Task
+from ..internal import copy_or_convert_data, read_data
 from ..itemset import ItemSet, ItemSetDict
 from ..sampling_graph import SamplingGraph
-from ..utils import copy_or_convert_data, read_data
 from .fused_csc_sampling_graph import (
    from_dglgraph,
    FusedCSCSamplingGraph,

--- a/python/dgl/graphbolt/utils/__init__.py
+++ b/python/dgl/graphbolt/utils/__init__.py
 """Utility functions for GraphBolt."""
-from .internal import *
+from .utils import *
 from .sample_utils import *
 from .datapipe_utils import *
 from .item_sampler_utils import *
--- a/python/dgl/graphbolt/utils/datapipe_utils.py
+++ b/python/dgl/graphbolt/utils/datapipe_utils.py
--- a/python/dgl/graphbolt/utils/item_sampler_utils.py
+++ b/python/dgl/graphbolt/utils/item_sampler_utils.py
--- a/python/dgl/graphbolt/utils/sample_utils.py
+++ b/python/dgl/graphbolt/utils/sample_utils.py
@@ -7,101 +7,6 @@ from typing import Dict, List, Tuple, Union
 import torch

 from ..base import CSCFormatBase, etype_str_to_tuple
-from ..minibatch import MiniBatch
-
-
-def add_reverse_edges(
-    edges: Union[
-        Dict[str, Tuple[torch.Tensor, torch.Tensor]],
-        Tuple[torch.Tensor, torch.Tensor],
-    ],
-    reverse_etypes_mapping: Dict[str, str] = None,
-):
-    r"""
-    This function finds the reverse edges of the given `edges` and returns the
-    composition of them. In a homogeneous graph, reverse edges have inverted
-    source and destination node IDs. While in a heterogeneous graph, reversing
-    also involves swapping node IDs and their types. This function could be
-    used before `exclude_edges` function to help find targeting edges.
-    Note: The found reverse edges may not really exists in the original graph.
-    And repeat edges could be added becasue reverse edges may already exists in
-    the `edges`.
-
-    Parameters
-    ----------
-    edges : Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
-                Tuple[torch.Tensor, torch.Tensor]]
-        - If sampled subgraph is homogeneous, then `edges` should be a pair of
-        of tensors.
-        - If sampled subgraph is heterogeneous, then `edges` should be a
-        dictionary of edge types and the corresponding edges to exclude.
-    reverse_etypes_mapping : Dict[str, str], optional
-        The mapping from the original edge types to their reverse edge types.
-
-    Returns
-    -------
-    Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
-        Tuple[torch.Tensor, torch.Tensor]]
-        The node pairs contain both the original edges and their reverse
-        counterparts.
-
-    Examples
-    --------
-    >>> edges = {"A:r:B": (torch.tensor([0, 1]), torch.tensor([1, 2]))}
-    >>> print(gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"}))
-    {'A:r:B': (tensor([0, 1]), tensor([1, 2])),
-    'B:rr:A': (tensor([1, 2]), tensor([0, 1]))}
-
-    >>> edges = (torch.tensor([0, 1]), torch.tensor([2, 1]))
-    >>> print(gb.add_reverse_edges(edges))
-    (tensor([0, 1, 2, 1]), tensor([2, 1, 0, 1]))
-    """
-    if isinstance(edges, tuple):
-        u, v = edges
-        return (torch.cat([u, v]), torch.cat([v, u]))
-    else:
-        combined_edges = edges.copy()
-        for etype, reverse_etype in reverse_etypes_mapping.items():
-            if etype in edges:
-                if reverse_etype in combined_edges:
-                    u, v = combined_edges[reverse_etype]
-                    u = torch.cat([u, edges[etype][1]])
-                    v = torch.cat([v, edges[etype][0]])
-                    combined_edges[reverse_etype] = (u, v)
-                else:
-                    combined_edges[reverse_etype] = (
-                        edges[etype][1],
-                        edges[etype][0],
-                    )
-        return combined_edges
-
-
-def exclude_seed_edges(
-    minibatch: MiniBatch,
-    include_reverse_edges: bool = False,
-    reverse_etypes_mapping: Dict[str, str] = None,
-):
-    """
-    Exclude seed edges with or without their reverse edges from the sampled
-    subgraphs in the minibatch.
-
-    Parameters
-    ----------
-    minibatch : MiniBatch
-        The minibatch.
-    reverse_etypes_mapping : Dict[str, str] = None
-        The mapping from the original edge types to their reverse edge types.
-    """
-    edges_to_exclude = minibatch.node_pairs
-    if include_reverse_edges:
-        edges_to_exclude = add_reverse_edges(
-            minibatch.node_pairs, reverse_etypes_mapping
-        )
-    minibatch.sampled_subgraphs = [
-        subgraph.exclude_edges(edges_to_exclude)
-        for subgraph in minibatch.sampled_subgraphs
-    ]
-    return minibatch


 def unique_and_compact(

--- a/python/dgl/graphbolt/utils/internal.py
+++ b/python/dgl/graphbolt/utils/internal.py
--- a/python/dgl/graphbolt/item_sampler.py
+++ b/python/dgl/graphbolt/item_sampler.py
@@ -14,9 +14,9 @@ from ..base import dgl_warning

 from ..batch import batch as dgl_batch
 from ..heterograph import DGLGraph
+from .internal import calculate_range
 from .itemset import ItemSet, ItemSetDict
 from .minibatch import MiniBatch
-from .utils import calculate_range

 __all__ = ["ItemSampler", "DistributedItemSampler", "minibatcher_default"]


--- a/python/dgl/graphbolt/subgraph_sampler.py
+++ b/python/dgl/graphbolt/subgraph_sampler.py
@@ -6,8 +6,8 @@ from typing import Dict
 from torch.utils.data import functional_datapipe

 from .base import etype_str_to_tuple
+from .internal import unique_and_compact
 from .minibatch_transformer import MiniBatchTransformer
-from .utils import unique_and_compact

 __all__ = [
    "SubgraphSampler",

--- a/python/dgl/graphbolt/utils.py
+++ b/python/dgl/graphbolt/utils.py
+"""Utility functions for external use."""
+
+from typing import Dict, Tuple, Union
+
+import torch
+
+from .minibatch import MiniBatch
+
+
+def add_reverse_edges(
+    edges: Union[
+        Dict[str, Tuple[torch.Tensor, torch.Tensor]],
+        Tuple[torch.Tensor, torch.Tensor],
+    ],
+    reverse_etypes_mapping: Dict[str, str] = None,
+):
+    r"""
+    This function finds the reverse edges of the given `edges` and returns the
+    composition of them. In a homogeneous graph, reverse edges have inverted
+    source and destination node IDs. While in a heterogeneous graph, reversing
+    also involves swapping node IDs and their types. This function could be
+    used before `exclude_edges` function to help find targeting edges.
+    Note: The found reverse edges may not really exists in the original graph.
+    And repeat edges could be added becasue reverse edges may already exists in
+    the `edges`.
+
+    Parameters
+    ----------
+    edges : Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
+                Tuple[torch.Tensor, torch.Tensor]]
+        - If sampled subgraph is homogeneous, then `edges` should be a pair of
+        of tensors.
+        - If sampled subgraph is heterogeneous, then `edges` should be a
+        dictionary of edge types and the corresponding edges to exclude.
+    reverse_etypes_mapping : Dict[str, str], optional
+        The mapping from the original edge types to their reverse edge types.
+
+    Returns
+    -------
+    Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
+        Tuple[torch.Tensor, torch.Tensor]]
+        The node pairs contain both the original edges and their reverse
+        counterparts.
+
+    Examples
+    --------
+    >>> edges = {"A:r:B": (torch.tensor([0, 1]), torch.tensor([1, 2]))}
+    >>> print(gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"}))
+    {'A:r:B': (tensor([0, 1]), tensor([1, 2])),
+    'B:rr:A': (tensor([1, 2]), tensor([0, 1]))}
+
+    >>> edges = (torch.tensor([0, 1]), torch.tensor([2, 1]))
+    >>> print(gb.add_reverse_edges(edges))
+    (tensor([0, 1, 2, 1]), tensor([2, 1, 0, 1]))
+    """
+    if isinstance(edges, tuple):
+        u, v = edges
+        return (torch.cat([u, v]), torch.cat([v, u]))
+    else:
+        combined_edges = edges.copy()
+        for etype, reverse_etype in reverse_etypes_mapping.items():
+            if etype in edges:
+                if reverse_etype in combined_edges:
+                    u, v = combined_edges[reverse_etype]
+                    u = torch.cat([u, edges[etype][1]])
+                    v = torch.cat([v, edges[etype][0]])
+                    combined_edges[reverse_etype] = (u, v)
+                else:
+                    combined_edges[reverse_etype] = (
+                        edges[etype][1],
+                        edges[etype][0],
+                    )
+        return combined_edges
+
+
+def exclude_seed_edges(
+    minibatch: MiniBatch,
+    include_reverse_edges: bool = False,
+    reverse_etypes_mapping: Dict[str, str] = None,
+):
+    """
+    Exclude seed edges with or without their reverse edges from the sampled
+    subgraphs in the minibatch.
+
+    Parameters
+    ----------
+    minibatch : MiniBatch
+        The minibatch.
+    reverse_etypes_mapping : Dict[str, str] = None
+        The mapping from the original edge types to their reverse edge types.
+    """
+    edges_to_exclude = minibatch.node_pairs
+    if include_reverse_edges:
+        edges_to_exclude = add_reverse_edges(
+            minibatch.node_pairs, reverse_etypes_mapping
+        )
+    minibatch.sampled_subgraphs = [
+        subgraph.exclude_edges(edges_to_exclude)
+        for subgraph in minibatch.sampled_subgraphs
+    ]
+    return minibatch
--- a/tests/python/pytorch/graphbolt/test_item_sampler.py
+++ b/tests/python/pytorch/graphbolt/test_item_sampler.py
@@ -865,7 +865,7 @@ def test_RangeCalculation(params):
    sum = 0
    for rank in range(num_replicas):
        for worker_id in range(max(num_workers, 1)):
-            result = gb.utils.calculate_range(
+            result = gb.internal.calculate_range(
                True,
                total,
                num_replicas,

--- a/tests/python/pytorch/graphbolt/utils/test_internal.py
+++ b/tests/python/pytorch/graphbolt/utils/test_internal.py
 import os
 import tempfile

-import dgl.graphbolt.utils as utils
+import dgl.graphbolt.internal as internal
 import numpy as np
 import pytest
 import torch
@@ -12,7 +12,7 @@ def test_read_torch_data():
        save_tensor = torch.tensor([[1, 2, 4], [2, 5, 3]])
        file_name = os.path.join(test_dir, "save_tensor.pt")
        torch.save(save_tensor, file_name)
-        read_tensor = utils.internal._read_torch_data(file_name)
+        read_tensor = internal.utils._read_torch_data(file_name)
        assert torch.equal(save_tensor, read_tensor)
        save_tensor = read_tensor = None

@@ -23,7 +23,7 @@ def test_read_numpy_data(in_memory):
        save_numpy = np.array([[1, 2, 4], [2, 5, 3]])
        file_name = os.path.join(test_dir, "save_numpy.npy")
        np.save(file_name, save_numpy)
-        read_tensor = utils.internal._read_numpy_data(file_name, in_memory)
+        read_tensor = internal.utils._read_numpy_data(file_name, in_memory)
        assert torch.equal(torch.from_numpy(save_numpy), read_tensor)
        save_numpy = read_tensor = None

@@ -38,7 +38,7 @@ def test_read_data(fmt):
            np.save(file_name, data)
        elif fmt == "torch":
            torch.save(torch.from_numpy(data), file_name)
-        read_tensor = utils.read_data(file_name, fmt)
+        read_tensor = internal.read_data(file_name, fmt)
        assert torch.equal(torch.from_numpy(data), read_tensor)


@@ -65,9 +65,9 @@ def test_save_data(data_fmt, save_fmt, contiguous):
        save_file_name = os.path.join(test_dir, f"save_data.{type_name}")
        # Step1. Save the data.
        if data_fmt == "torch":
-            utils.save_data(tensor_data, save_file_name, save_fmt)
+            internal.save_data(tensor_data, save_file_name, save_fmt)
        elif data_fmt == "numpy":
-            utils.save_data(data, save_file_name, save_fmt)
+            internal.save_data(data, save_file_name, save_fmt)

        # Step2. Load the data.
        if save_fmt == "torch":
@@ -91,11 +91,11 @@ def test_get_npy_dim(fmt):
        file_name = os.path.join(test_dir, f"save_data.{type_name}")
        if fmt == "numpy":
            np.save(file_name, data)
-            assert utils.get_npy_dim(file_name) == 2
+            assert internal.get_npy_dim(file_name) == 2
        elif fmt == "torch":
            torch.save(torch.from_numpy(data), file_name)
            with pytest.raises(ValueError):
-                utils.get_npy_dim(file_name)
+                internal.get_npy_dim(file_name)
        data = None


@@ -116,7 +116,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature):
            torch.save(tensor_data, input_path)
        if save_fmt == "torch":
            with pytest.raises(AssertionError):
-                utils.copy_or_convert_data(
+                internal.copy_or_convert_data(
                    input_path,
                    output_path,
                    data_fmt,
@@ -124,7 +124,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature):
                    is_feature=is_feature,
                )
        else:
-            utils.copy_or_convert_data(
+            internal.copy_or_convert_data(
                input_path,
                output_path,
                data_fmt,