"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "c71c19c5e692715dc7a75771936c40201eac9409"
Unverified Commit 43b45ab6 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[GraphBolt] update URL for ogb-lsc-mag240m (#6392)

parent fab7a950
...@@ -528,23 +528,34 @@ class BuiltinDataset(OnDiskDataset): ...@@ -528,23 +528,34 @@ class BuiltinDataset(OnDiskDataset):
The root directory of the dataset. Default ot ``datasets``. The root directory of the dataset. Default ot ``datasets``.
""" """
# For dataset that is smaller than 30GB, we use the base url.
# Otherwise, we use the accelerated url.
_base_url = "https://data.dgl.ai/dataset/graphbolt/" _base_url = "https://data.dgl.ai/dataset/graphbolt/"
_accelerated_url = (
"https://dgl-data.s3-accelerate.amazonaws.com/dataset/graphbolt/"
)
_datasets = [ _datasets = [
"ogbn-mag", "ogbn-mag",
"ogbl-citation2", "ogbl-citation2",
"ogbn-products", "ogbn-products",
"ogb-lsc-mag240m",
] ]
_large_datasets = ["ogb-lsc-mag240m"]
_all_datasets = _datasets + _large_datasets
def __init__(self, name: str, root: str = "datasets") -> OnDiskDataset: def __init__(self, name: str, root: str = "datasets") -> OnDiskDataset:
dataset_dir = os.path.join(root, name) dataset_dir = os.path.join(root, name)
if not os.path.exists(dataset_dir): if not os.path.exists(dataset_dir):
if name not in self._datasets: if name not in self._all_datasets:
raise RuntimeError( raise RuntimeError(
f"Dataset {name} is not available. Available datasets are " f"Dataset {name} is not available. Available datasets are "
f"{self._datasets}." f"{self._all_datasets}."
) )
url = self._base_url + name + ".zip" url = (
self._accelerated_url
if name in self._large_datasets
else self._base_url
)
url += name + ".zip"
os.makedirs(root, exist_ok=True) os.makedirs(root, exist_ok=True)
zip_file_path = os.path.join(root, name + ".zip") zip_file_path = os.path.join(root, name + ".zip")
download(url, path=zip_file_path) download(url, path=zip_file_path)
......
...@@ -1702,7 +1702,7 @@ def test_BuiltinDataset(): ...@@ -1702,7 +1702,7 @@ def test_BuiltinDataset():
# Case 1: download from DGL S3 storage. # Case 1: download from DGL S3 storage.
dataset_name = "test-only" dataset_name = "test-only"
# Add test-only dataset to the builtin dataset list for testing only. # Add test-only dataset to the builtin dataset list for testing only.
gb.BuiltinDataset._datasets.append(dataset_name) gb.BuiltinDataset._all_datasets.append(dataset_name)
dataset = gb.BuiltinDataset(name=dataset_name, root=test_dir).load() dataset = gb.BuiltinDataset(name=dataset_name, root=test_dir).load()
assert dataset.graph is not None assert dataset.graph is not None
assert dataset.feature is not None assert dataset.feature is not None
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment