Unverified Commit 9aca3092 authored by yxy235's avatar yxy235 Committed by GitHub
Browse files

[GraphBolt] Rename and move utils files. (#6627)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
parent 829c073f
...@@ -18,13 +18,12 @@ from .minibatch_transformer import * ...@@ -18,13 +18,12 @@ from .minibatch_transformer import *
from .negative_sampler import * from .negative_sampler import *
from .sampled_subgraph import * from .sampled_subgraph import *
from .subgraph_sampler import * from .subgraph_sampler import *
from .utils import ( from .internal import (
add_reverse_edges,
compact_csc_format, compact_csc_format,
exclude_seed_edges,
unique_and_compact, unique_and_compact,
unique_and_compact_node_pairs, unique_and_compact_node_pairs,
) )
from .utils import add_reverse_edges, exclude_seed_edges
def load_graphbolt(): def load_graphbolt():
......
...@@ -6,9 +6,9 @@ import torchdata.datapipes as dp ...@@ -6,9 +6,9 @@ import torchdata.datapipes as dp
from .base import CopyTo from .base import CopyTo
from .feature_fetcher import FeatureFetcher from .feature_fetcher import FeatureFetcher
from .item_sampler import ItemSampler
from .utils import datapipe_graph_to_adjlist from .internal import datapipe_graph_to_adjlist
from .item_sampler import ItemSampler
__all__ = [ __all__ = [
......
...@@ -2,8 +2,9 @@ ...@@ -2,8 +2,9 @@
from torch.utils.data import functional_datapipe from torch.utils.data import functional_datapipe
from ..internal import unique_and_compact_node_pairs
from ..subgraph_sampler import SubgraphSampler from ..subgraph_sampler import SubgraphSampler
from ..utils import unique_and_compact_node_pairs
from .sampled_subgraph_impl import FusedSampledSubgraphImpl from .sampled_subgraph_impl import FusedSampledSubgraphImpl
......
...@@ -3,8 +3,9 @@ ...@@ -3,8 +3,9 @@
import torch import torch
from torch.utils.data import functional_datapipe from torch.utils.data import functional_datapipe
from ..internal import compact_csc_format, unique_and_compact_node_pairs
from ..subgraph_sampler import SubgraphSampler from ..subgraph_sampler import SubgraphSampler
from ..utils import compact_csc_format, unique_and_compact_node_pairs
from .sampled_subgraph_impl import FusedSampledSubgraphImpl, SampledSubgraphImpl from .sampled_subgraph_impl import FusedSampledSubgraphImpl, SampledSubgraphImpl
......
...@@ -14,9 +14,9 @@ from ...base import dgl_warning ...@@ -14,9 +14,9 @@ from ...base import dgl_warning
from ...data.utils import download, extract_archive from ...data.utils import download, extract_archive
from ..base import etype_str_to_tuple from ..base import etype_str_to_tuple
from ..dataset import Dataset, Task from ..dataset import Dataset, Task
from ..internal import copy_or_convert_data, read_data
from ..itemset import ItemSet, ItemSetDict from ..itemset import ItemSet, ItemSetDict
from ..sampling_graph import SamplingGraph from ..sampling_graph import SamplingGraph
from ..utils import copy_or_convert_data, read_data
from .fused_csc_sampling_graph import ( from .fused_csc_sampling_graph import (
from_dglgraph, from_dglgraph,
FusedCSCSamplingGraph, FusedCSCSamplingGraph,
......
"""Utility functions for GraphBolt.""" """Utility functions for GraphBolt."""
from .internal import * from .utils import *
from .sample_utils import * from .sample_utils import *
from .datapipe_utils import * from .datapipe_utils import *
from .item_sampler_utils import * from .item_sampler_utils import *
...@@ -7,101 +7,6 @@ from typing import Dict, List, Tuple, Union ...@@ -7,101 +7,6 @@ from typing import Dict, List, Tuple, Union
import torch import torch
from ..base import CSCFormatBase, etype_str_to_tuple from ..base import CSCFormatBase, etype_str_to_tuple
from ..minibatch import MiniBatch
def add_reverse_edges(
edges: Union[
Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor],
],
reverse_etypes_mapping: Dict[str, str] = None,
):
r"""
This function finds the reverse edges of the given `edges` and returns the
composition of them. In a homogeneous graph, reverse edges have inverted
source and destination node IDs. While in a heterogeneous graph, reversing
also involves swapping node IDs and their types. This function could be
used before `exclude_edges` function to help find targeting edges.
Note: The found reverse edges may not really exists in the original graph.
And repeat edges could be added becasue reverse edges may already exists in
the `edges`.
Parameters
----------
edges : Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
- If sampled subgraph is homogeneous, then `edges` should be a pair of
of tensors.
- If sampled subgraph is heterogeneous, then `edges` should be a
dictionary of edge types and the corresponding edges to exclude.
reverse_etypes_mapping : Dict[str, str], optional
The mapping from the original edge types to their reverse edge types.
Returns
-------
Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
The node pairs contain both the original edges and their reverse
counterparts.
Examples
--------
>>> edges = {"A:r:B": (torch.tensor([0, 1]), torch.tensor([1, 2]))}
>>> print(gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"}))
{'A:r:B': (tensor([0, 1]), tensor([1, 2])),
'B:rr:A': (tensor([1, 2]), tensor([0, 1]))}
>>> edges = (torch.tensor([0, 1]), torch.tensor([2, 1]))
>>> print(gb.add_reverse_edges(edges))
(tensor([0, 1, 2, 1]), tensor([2, 1, 0, 1]))
"""
if isinstance(edges, tuple):
u, v = edges
return (torch.cat([u, v]), torch.cat([v, u]))
else:
combined_edges = edges.copy()
for etype, reverse_etype in reverse_etypes_mapping.items():
if etype in edges:
if reverse_etype in combined_edges:
u, v = combined_edges[reverse_etype]
u = torch.cat([u, edges[etype][1]])
v = torch.cat([v, edges[etype][0]])
combined_edges[reverse_etype] = (u, v)
else:
combined_edges[reverse_etype] = (
edges[etype][1],
edges[etype][0],
)
return combined_edges
def exclude_seed_edges(
minibatch: MiniBatch,
include_reverse_edges: bool = False,
reverse_etypes_mapping: Dict[str, str] = None,
):
"""
Exclude seed edges with or without their reverse edges from the sampled
subgraphs in the minibatch.
Parameters
----------
minibatch : MiniBatch
The minibatch.
reverse_etypes_mapping : Dict[str, str] = None
The mapping from the original edge types to their reverse edge types.
"""
edges_to_exclude = minibatch.node_pairs
if include_reverse_edges:
edges_to_exclude = add_reverse_edges(
minibatch.node_pairs, reverse_etypes_mapping
)
minibatch.sampled_subgraphs = [
subgraph.exclude_edges(edges_to_exclude)
for subgraph in minibatch.sampled_subgraphs
]
return minibatch
def unique_and_compact( def unique_and_compact(
......
...@@ -14,9 +14,9 @@ from ..base import dgl_warning ...@@ -14,9 +14,9 @@ from ..base import dgl_warning
from ..batch import batch as dgl_batch from ..batch import batch as dgl_batch
from ..heterograph import DGLGraph from ..heterograph import DGLGraph
from .internal import calculate_range
from .itemset import ItemSet, ItemSetDict from .itemset import ItemSet, ItemSetDict
from .minibatch import MiniBatch from .minibatch import MiniBatch
from .utils import calculate_range
__all__ = ["ItemSampler", "DistributedItemSampler", "minibatcher_default"] __all__ = ["ItemSampler", "DistributedItemSampler", "minibatcher_default"]
......
...@@ -6,8 +6,8 @@ from typing import Dict ...@@ -6,8 +6,8 @@ from typing import Dict
from torch.utils.data import functional_datapipe from torch.utils.data import functional_datapipe
from .base import etype_str_to_tuple from .base import etype_str_to_tuple
from .internal import unique_and_compact
from .minibatch_transformer import MiniBatchTransformer from .minibatch_transformer import MiniBatchTransformer
from .utils import unique_and_compact
__all__ = [ __all__ = [
"SubgraphSampler", "SubgraphSampler",
......
"""Utility functions for external use."""
from typing import Dict, Tuple, Union
import torch
from .minibatch import MiniBatch
def add_reverse_edges(
edges: Union[
Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor],
],
reverse_etypes_mapping: Dict[str, str] = None,
):
r"""
This function finds the reverse edges of the given `edges` and returns the
composition of them. In a homogeneous graph, reverse edges have inverted
source and destination node IDs. While in a heterogeneous graph, reversing
also involves swapping node IDs and their types. This function could be
used before `exclude_edges` function to help find targeting edges.
Note: The found reverse edges may not really exists in the original graph.
And repeat edges could be added becasue reverse edges may already exists in
the `edges`.
Parameters
----------
edges : Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
- If sampled subgraph is homogeneous, then `edges` should be a pair of
of tensors.
- If sampled subgraph is heterogeneous, then `edges` should be a
dictionary of edge types and the corresponding edges to exclude.
reverse_etypes_mapping : Dict[str, str], optional
The mapping from the original edge types to their reverse edge types.
Returns
-------
Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
The node pairs contain both the original edges and their reverse
counterparts.
Examples
--------
>>> edges = {"A:r:B": (torch.tensor([0, 1]), torch.tensor([1, 2]))}
>>> print(gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"}))
{'A:r:B': (tensor([0, 1]), tensor([1, 2])),
'B:rr:A': (tensor([1, 2]), tensor([0, 1]))}
>>> edges = (torch.tensor([0, 1]), torch.tensor([2, 1]))
>>> print(gb.add_reverse_edges(edges))
(tensor([0, 1, 2, 1]), tensor([2, 1, 0, 1]))
"""
if isinstance(edges, tuple):
u, v = edges
return (torch.cat([u, v]), torch.cat([v, u]))
else:
combined_edges = edges.copy()
for etype, reverse_etype in reverse_etypes_mapping.items():
if etype in edges:
if reverse_etype in combined_edges:
u, v = combined_edges[reverse_etype]
u = torch.cat([u, edges[etype][1]])
v = torch.cat([v, edges[etype][0]])
combined_edges[reverse_etype] = (u, v)
else:
combined_edges[reverse_etype] = (
edges[etype][1],
edges[etype][0],
)
return combined_edges
def exclude_seed_edges(
minibatch: MiniBatch,
include_reverse_edges: bool = False,
reverse_etypes_mapping: Dict[str, str] = None,
):
"""
Exclude seed edges with or without their reverse edges from the sampled
subgraphs in the minibatch.
Parameters
----------
minibatch : MiniBatch
The minibatch.
reverse_etypes_mapping : Dict[str, str] = None
The mapping from the original edge types to their reverse edge types.
"""
edges_to_exclude = minibatch.node_pairs
if include_reverse_edges:
edges_to_exclude = add_reverse_edges(
minibatch.node_pairs, reverse_etypes_mapping
)
minibatch.sampled_subgraphs = [
subgraph.exclude_edges(edges_to_exclude)
for subgraph in minibatch.sampled_subgraphs
]
return minibatch
...@@ -865,7 +865,7 @@ def test_RangeCalculation(params): ...@@ -865,7 +865,7 @@ def test_RangeCalculation(params):
sum = 0 sum = 0
for rank in range(num_replicas): for rank in range(num_replicas):
for worker_id in range(max(num_workers, 1)): for worker_id in range(max(num_workers, 1)):
result = gb.utils.calculate_range( result = gb.internal.calculate_range(
True, True,
total, total,
num_replicas, num_replicas,
......
import os import os
import tempfile import tempfile
import dgl.graphbolt.utils as utils import dgl.graphbolt.internal as internal
import numpy as np import numpy as np
import pytest import pytest
import torch import torch
...@@ -12,7 +12,7 @@ def test_read_torch_data(): ...@@ -12,7 +12,7 @@ def test_read_torch_data():
save_tensor = torch.tensor([[1, 2, 4], [2, 5, 3]]) save_tensor = torch.tensor([[1, 2, 4], [2, 5, 3]])
file_name = os.path.join(test_dir, "save_tensor.pt") file_name = os.path.join(test_dir, "save_tensor.pt")
torch.save(save_tensor, file_name) torch.save(save_tensor, file_name)
read_tensor = utils.internal._read_torch_data(file_name) read_tensor = internal.utils._read_torch_data(file_name)
assert torch.equal(save_tensor, read_tensor) assert torch.equal(save_tensor, read_tensor)
save_tensor = read_tensor = None save_tensor = read_tensor = None
...@@ -23,7 +23,7 @@ def test_read_numpy_data(in_memory): ...@@ -23,7 +23,7 @@ def test_read_numpy_data(in_memory):
save_numpy = np.array([[1, 2, 4], [2, 5, 3]]) save_numpy = np.array([[1, 2, 4], [2, 5, 3]])
file_name = os.path.join(test_dir, "save_numpy.npy") file_name = os.path.join(test_dir, "save_numpy.npy")
np.save(file_name, save_numpy) np.save(file_name, save_numpy)
read_tensor = utils.internal._read_numpy_data(file_name, in_memory) read_tensor = internal.utils._read_numpy_data(file_name, in_memory)
assert torch.equal(torch.from_numpy(save_numpy), read_tensor) assert torch.equal(torch.from_numpy(save_numpy), read_tensor)
save_numpy = read_tensor = None save_numpy = read_tensor = None
...@@ -38,7 +38,7 @@ def test_read_data(fmt): ...@@ -38,7 +38,7 @@ def test_read_data(fmt):
np.save(file_name, data) np.save(file_name, data)
elif fmt == "torch": elif fmt == "torch":
torch.save(torch.from_numpy(data), file_name) torch.save(torch.from_numpy(data), file_name)
read_tensor = utils.read_data(file_name, fmt) read_tensor = internal.read_data(file_name, fmt)
assert torch.equal(torch.from_numpy(data), read_tensor) assert torch.equal(torch.from_numpy(data), read_tensor)
...@@ -65,9 +65,9 @@ def test_save_data(data_fmt, save_fmt, contiguous): ...@@ -65,9 +65,9 @@ def test_save_data(data_fmt, save_fmt, contiguous):
save_file_name = os.path.join(test_dir, f"save_data.{type_name}") save_file_name = os.path.join(test_dir, f"save_data.{type_name}")
# Step1. Save the data. # Step1. Save the data.
if data_fmt == "torch": if data_fmt == "torch":
utils.save_data(tensor_data, save_file_name, save_fmt) internal.save_data(tensor_data, save_file_name, save_fmt)
elif data_fmt == "numpy": elif data_fmt == "numpy":
utils.save_data(data, save_file_name, save_fmt) internal.save_data(data, save_file_name, save_fmt)
# Step2. Load the data. # Step2. Load the data.
if save_fmt == "torch": if save_fmt == "torch":
...@@ -91,11 +91,11 @@ def test_get_npy_dim(fmt): ...@@ -91,11 +91,11 @@ def test_get_npy_dim(fmt):
file_name = os.path.join(test_dir, f"save_data.{type_name}") file_name = os.path.join(test_dir, f"save_data.{type_name}")
if fmt == "numpy": if fmt == "numpy":
np.save(file_name, data) np.save(file_name, data)
assert utils.get_npy_dim(file_name) == 2 assert internal.get_npy_dim(file_name) == 2
elif fmt == "torch": elif fmt == "torch":
torch.save(torch.from_numpy(data), file_name) torch.save(torch.from_numpy(data), file_name)
with pytest.raises(ValueError): with pytest.raises(ValueError):
utils.get_npy_dim(file_name) internal.get_npy_dim(file_name)
data = None data = None
...@@ -116,7 +116,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature): ...@@ -116,7 +116,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature):
torch.save(tensor_data, input_path) torch.save(tensor_data, input_path)
if save_fmt == "torch": if save_fmt == "torch":
with pytest.raises(AssertionError): with pytest.raises(AssertionError):
utils.copy_or_convert_data( internal.copy_or_convert_data(
input_path, input_path,
output_path, output_path,
data_fmt, data_fmt,
...@@ -124,7 +124,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature): ...@@ -124,7 +124,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature):
is_feature=is_feature, is_feature=is_feature,
) )
else: else:
utils.copy_or_convert_data( internal.copy_or_convert_data(
input_path, input_path,
output_path, output_path,
data_fmt, data_fmt,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment