Unverified Commit 9aca3092 authored by yxy235's avatar yxy235 Committed by GitHub
Browse files

[GraphBolt] Rename and move utils files. (#6627)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-0-133.us-west-2.compute.internal>
parent 829c073f
......@@ -18,13 +18,12 @@ from .minibatch_transformer import *
from .negative_sampler import *
from .sampled_subgraph import *
from .subgraph_sampler import *
from .utils import (
add_reverse_edges,
from .internal import (
compact_csc_format,
exclude_seed_edges,
unique_and_compact,
unique_and_compact_node_pairs,
)
from .utils import add_reverse_edges, exclude_seed_edges
def load_graphbolt():
......
......@@ -6,9 +6,9 @@ import torchdata.datapipes as dp
from .base import CopyTo
from .feature_fetcher import FeatureFetcher
from .item_sampler import ItemSampler
from .utils import datapipe_graph_to_adjlist
from .internal import datapipe_graph_to_adjlist
from .item_sampler import ItemSampler
__all__ = [
......
......@@ -2,8 +2,9 @@
from torch.utils.data import functional_datapipe
from ..internal import unique_and_compact_node_pairs
from ..subgraph_sampler import SubgraphSampler
from ..utils import unique_and_compact_node_pairs
from .sampled_subgraph_impl import FusedSampledSubgraphImpl
......
......@@ -3,8 +3,9 @@
import torch
from torch.utils.data import functional_datapipe
from ..internal import compact_csc_format, unique_and_compact_node_pairs
from ..subgraph_sampler import SubgraphSampler
from ..utils import compact_csc_format, unique_and_compact_node_pairs
from .sampled_subgraph_impl import FusedSampledSubgraphImpl, SampledSubgraphImpl
......
......@@ -14,9 +14,9 @@ from ...base import dgl_warning
from ...data.utils import download, extract_archive
from ..base import etype_str_to_tuple
from ..dataset import Dataset, Task
from ..internal import copy_or_convert_data, read_data
from ..itemset import ItemSet, ItemSetDict
from ..sampling_graph import SamplingGraph
from ..utils import copy_or_convert_data, read_data
from .fused_csc_sampling_graph import (
from_dglgraph,
FusedCSCSamplingGraph,
......
"""Utility functions for GraphBolt."""
from .internal import *
from .utils import *
from .sample_utils import *
from .datapipe_utils import *
from .item_sampler_utils import *
......@@ -7,101 +7,6 @@ from typing import Dict, List, Tuple, Union
import torch
from ..base import CSCFormatBase, etype_str_to_tuple
from ..minibatch import MiniBatch
def add_reverse_edges(
edges: Union[
Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor],
],
reverse_etypes_mapping: Dict[str, str] = None,
):
r"""
This function finds the reverse edges of the given `edges` and returns the
composition of them. In a homogeneous graph, reverse edges have inverted
source and destination node IDs. While in a heterogeneous graph, reversing
also involves swapping node IDs and their types. This function could be
used before `exclude_edges` function to help find targeting edges.
Note: The found reverse edges may not really exists in the original graph.
And repeat edges could be added becasue reverse edges may already exists in
the `edges`.
Parameters
----------
edges : Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
- If sampled subgraph is homogeneous, then `edges` should be a pair of
of tensors.
- If sampled subgraph is heterogeneous, then `edges` should be a
dictionary of edge types and the corresponding edges to exclude.
reverse_etypes_mapping : Dict[str, str], optional
The mapping from the original edge types to their reverse edge types.
Returns
-------
Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
The node pairs contain both the original edges and their reverse
counterparts.
Examples
--------
>>> edges = {"A:r:B": (torch.tensor([0, 1]), torch.tensor([1, 2]))}
>>> print(gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"}))
{'A:r:B': (tensor([0, 1]), tensor([1, 2])),
'B:rr:A': (tensor([1, 2]), tensor([0, 1]))}
>>> edges = (torch.tensor([0, 1]), torch.tensor([2, 1]))
>>> print(gb.add_reverse_edges(edges))
(tensor([0, 1, 2, 1]), tensor([2, 1, 0, 1]))
"""
if isinstance(edges, tuple):
u, v = edges
return (torch.cat([u, v]), torch.cat([v, u]))
else:
combined_edges = edges.copy()
for etype, reverse_etype in reverse_etypes_mapping.items():
if etype in edges:
if reverse_etype in combined_edges:
u, v = combined_edges[reverse_etype]
u = torch.cat([u, edges[etype][1]])
v = torch.cat([v, edges[etype][0]])
combined_edges[reverse_etype] = (u, v)
else:
combined_edges[reverse_etype] = (
edges[etype][1],
edges[etype][0],
)
return combined_edges
def exclude_seed_edges(
minibatch: MiniBatch,
include_reverse_edges: bool = False,
reverse_etypes_mapping: Dict[str, str] = None,
):
"""
Exclude seed edges with or without their reverse edges from the sampled
subgraphs in the minibatch.
Parameters
----------
minibatch : MiniBatch
The minibatch.
reverse_etypes_mapping : Dict[str, str] = None
The mapping from the original edge types to their reverse edge types.
"""
edges_to_exclude = minibatch.node_pairs
if include_reverse_edges:
edges_to_exclude = add_reverse_edges(
minibatch.node_pairs, reverse_etypes_mapping
)
minibatch.sampled_subgraphs = [
subgraph.exclude_edges(edges_to_exclude)
for subgraph in minibatch.sampled_subgraphs
]
return minibatch
def unique_and_compact(
......
......@@ -14,9 +14,9 @@ from ..base import dgl_warning
from ..batch import batch as dgl_batch
from ..heterograph import DGLGraph
from .internal import calculate_range
from .itemset import ItemSet, ItemSetDict
from .minibatch import MiniBatch
from .utils import calculate_range
__all__ = ["ItemSampler", "DistributedItemSampler", "minibatcher_default"]
......
......@@ -6,8 +6,8 @@ from typing import Dict
from torch.utils.data import functional_datapipe
from .base import etype_str_to_tuple
from .internal import unique_and_compact
from .minibatch_transformer import MiniBatchTransformer
from .utils import unique_and_compact
__all__ = [
"SubgraphSampler",
......
"""Utility functions for external use."""
from typing import Dict, Tuple, Union
import torch
from .minibatch import MiniBatch
def add_reverse_edges(
edges: Union[
Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor],
],
reverse_etypes_mapping: Dict[str, str] = None,
):
r"""
This function finds the reverse edges of the given `edges` and returns the
composition of them. In a homogeneous graph, reverse edges have inverted
source and destination node IDs. While in a heterogeneous graph, reversing
also involves swapping node IDs and their types. This function could be
used before `exclude_edges` function to help find targeting edges.
Note: The found reverse edges may not really exists in the original graph.
And repeat edges could be added becasue reverse edges may already exists in
the `edges`.
Parameters
----------
edges : Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
- If sampled subgraph is homogeneous, then `edges` should be a pair of
of tensors.
- If sampled subgraph is heterogeneous, then `edges` should be a
dictionary of edge types and the corresponding edges to exclude.
reverse_etypes_mapping : Dict[str, str], optional
The mapping from the original edge types to their reverse edge types.
Returns
-------
Union[Dict[str, Tuple[torch.Tensor, torch.Tensor]],
Tuple[torch.Tensor, torch.Tensor]]
The node pairs contain both the original edges and their reverse
counterparts.
Examples
--------
>>> edges = {"A:r:B": (torch.tensor([0, 1]), torch.tensor([1, 2]))}
>>> print(gb.add_reverse_edges(edges, {"A:r:B": "B:rr:A"}))
{'A:r:B': (tensor([0, 1]), tensor([1, 2])),
'B:rr:A': (tensor([1, 2]), tensor([0, 1]))}
>>> edges = (torch.tensor([0, 1]), torch.tensor([2, 1]))
>>> print(gb.add_reverse_edges(edges))
(tensor([0, 1, 2, 1]), tensor([2, 1, 0, 1]))
"""
if isinstance(edges, tuple):
u, v = edges
return (torch.cat([u, v]), torch.cat([v, u]))
else:
combined_edges = edges.copy()
for etype, reverse_etype in reverse_etypes_mapping.items():
if etype in edges:
if reverse_etype in combined_edges:
u, v = combined_edges[reverse_etype]
u = torch.cat([u, edges[etype][1]])
v = torch.cat([v, edges[etype][0]])
combined_edges[reverse_etype] = (u, v)
else:
combined_edges[reverse_etype] = (
edges[etype][1],
edges[etype][0],
)
return combined_edges
def exclude_seed_edges(
minibatch: MiniBatch,
include_reverse_edges: bool = False,
reverse_etypes_mapping: Dict[str, str] = None,
):
"""
Exclude seed edges with or without their reverse edges from the sampled
subgraphs in the minibatch.
Parameters
----------
minibatch : MiniBatch
The minibatch.
reverse_etypes_mapping : Dict[str, str] = None
The mapping from the original edge types to their reverse edge types.
"""
edges_to_exclude = minibatch.node_pairs
if include_reverse_edges:
edges_to_exclude = add_reverse_edges(
minibatch.node_pairs, reverse_etypes_mapping
)
minibatch.sampled_subgraphs = [
subgraph.exclude_edges(edges_to_exclude)
for subgraph in minibatch.sampled_subgraphs
]
return minibatch
......@@ -865,7 +865,7 @@ def test_RangeCalculation(params):
sum = 0
for rank in range(num_replicas):
for worker_id in range(max(num_workers, 1)):
result = gb.utils.calculate_range(
result = gb.internal.calculate_range(
True,
total,
num_replicas,
......
import os
import tempfile
import dgl.graphbolt.utils as utils
import dgl.graphbolt.internal as internal
import numpy as np
import pytest
import torch
......@@ -12,7 +12,7 @@ def test_read_torch_data():
save_tensor = torch.tensor([[1, 2, 4], [2, 5, 3]])
file_name = os.path.join(test_dir, "save_tensor.pt")
torch.save(save_tensor, file_name)
read_tensor = utils.internal._read_torch_data(file_name)
read_tensor = internal.utils._read_torch_data(file_name)
assert torch.equal(save_tensor, read_tensor)
save_tensor = read_tensor = None
......@@ -23,7 +23,7 @@ def test_read_numpy_data(in_memory):
save_numpy = np.array([[1, 2, 4], [2, 5, 3]])
file_name = os.path.join(test_dir, "save_numpy.npy")
np.save(file_name, save_numpy)
read_tensor = utils.internal._read_numpy_data(file_name, in_memory)
read_tensor = internal.utils._read_numpy_data(file_name, in_memory)
assert torch.equal(torch.from_numpy(save_numpy), read_tensor)
save_numpy = read_tensor = None
......@@ -38,7 +38,7 @@ def test_read_data(fmt):
np.save(file_name, data)
elif fmt == "torch":
torch.save(torch.from_numpy(data), file_name)
read_tensor = utils.read_data(file_name, fmt)
read_tensor = internal.read_data(file_name, fmt)
assert torch.equal(torch.from_numpy(data), read_tensor)
......@@ -65,9 +65,9 @@ def test_save_data(data_fmt, save_fmt, contiguous):
save_file_name = os.path.join(test_dir, f"save_data.{type_name}")
# Step1. Save the data.
if data_fmt == "torch":
utils.save_data(tensor_data, save_file_name, save_fmt)
internal.save_data(tensor_data, save_file_name, save_fmt)
elif data_fmt == "numpy":
utils.save_data(data, save_file_name, save_fmt)
internal.save_data(data, save_file_name, save_fmt)
# Step2. Load the data.
if save_fmt == "torch":
......@@ -91,11 +91,11 @@ def test_get_npy_dim(fmt):
file_name = os.path.join(test_dir, f"save_data.{type_name}")
if fmt == "numpy":
np.save(file_name, data)
assert utils.get_npy_dim(file_name) == 2
assert internal.get_npy_dim(file_name) == 2
elif fmt == "torch":
torch.save(torch.from_numpy(data), file_name)
with pytest.raises(ValueError):
utils.get_npy_dim(file_name)
internal.get_npy_dim(file_name)
data = None
......@@ -116,7 +116,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature):
torch.save(tensor_data, input_path)
if save_fmt == "torch":
with pytest.raises(AssertionError):
utils.copy_or_convert_data(
internal.copy_or_convert_data(
input_path,
output_path,
data_fmt,
......@@ -124,7 +124,7 @@ def test_copy_or_convert_data(data_fmt, save_fmt, is_feature):
is_feature=is_feature,
)
else:
utils.copy_or_convert_data(
internal.copy_or_convert_data(
input_path,
output_path,
data_fmt,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment