Unverified Commit 9ed28089 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[GraphBolt] add available builtin datasets into docstring (#6346)

parent e3266c43
......@@ -482,6 +482,29 @@ class BuiltinDataset(OnDiskDataset):
This class is used to help download datasets from DGL S3 storage and load
them as ``OnDiskDataset``.
Available builtin datasets include:
**ogbn-mag**
The ogbn-mag dataset is a heterogeneous network composed of a subset of
the Microsoft Academic Graph (MAG). See more details in
`ogbn-mag <https://ogb.stanford.edu/docs/nodeprop/#ogbn-mag>`_.
.. note::
Reverse edges are added to the original graph and duplicated
edges are removed.
**ogbl-citation2**
The ogbl-citation2 dataset is a directed graph, representing the
citation network between a subset of papers extracted from MAG. See
more details in `ogbl-citation2
<https://ogb.stanford.edu/docs/linkprop/#ogbl-citation2>`_.
**ogbn-products**
The ogbn-products dataset is an undirected and unweighted graph,
representing an Amazon product co-purchasing network. See more details
in `ogbn-products
https://ogb.stanford.edu/docs/nodeprop/#ogbn-products>`_.
.. note::
Reverse edges are added to the original graph.
Parameters
----------
name : str
......@@ -491,10 +514,16 @@ class BuiltinDataset(OnDiskDataset):
"""
_base_url = "https://data.dgl.ai/dataset/graphbolt/"
_datasets = ["ogbn-mag", "ogbl-citation2", "ogbn-products"]
def __init__(self, name: str, root: str = "datasets") -> OnDiskDataset:
dataset_dir = os.path.join(root, name)
if not os.path.exists(dataset_dir):
if name not in self._datasets:
raise RuntimeError(
f"Dataset {name} is not available. Available datasets are "
f"{self._datasets}."
)
url = self._base_url + name + ".zip"
os.makedirs(root, exist_ok=True)
zip_file_path = os.path.join(root, name + ".zip")
......
......@@ -1701,6 +1701,8 @@ def test_BuiltinDataset():
with tempfile.TemporaryDirectory() as test_dir:
# Case 1: download from DGL S3 storage.
dataset_name = "test-only"
# Add test-only dataset to the builtin dataset list for testing only.
gb.BuiltinDataset._datasets.append(dataset_name)
dataset = gb.BuiltinDataset(name=dataset_name, root=test_dir).load()
assert dataset.graph is not None
assert dataset.feature is not None
......@@ -1713,3 +1715,11 @@ def test_BuiltinDataset():
assert dataset.feature is not None
assert dataset.tasks is not None
assert dataset.dataset_name == dataset_name
# Case 3: dataset is not available.
dataset_name = "fake_name"
with pytest.raises(
RuntimeError,
match=rf"Dataset {dataset_name} is not available.*",
):
_ = gb.BuiltinDataset(name=dataset_name, root=test_dir).load()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment