Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
43b45ab6
Unverified
Commit
43b45ab6
authored
Sep 27, 2023
by
Rhett Ying
Committed by
GitHub
Sep 27, 2023
Browse files
[GraphBolt] update URL for ogb-lsc-mag240m (#6392)
parent
fab7a950
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
5 deletions
+16
-5
python/dgl/graphbolt/impl/ondisk_dataset.py
python/dgl/graphbolt/impl/ondisk_dataset.py
+15
-4
tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
+1
-1
No files found.
python/dgl/graphbolt/impl/ondisk_dataset.py
View file @
43b45ab6
...
@@ -528,23 +528,34 @@ class BuiltinDataset(OnDiskDataset):
...
@@ -528,23 +528,34 @@ class BuiltinDataset(OnDiskDataset):
The root directory of the dataset. Default ot ``datasets``.
The root directory of the dataset. Default ot ``datasets``.
"""
"""
# For dataset that is smaller than 30GB, we use the base url.
# Otherwise, we use the accelerated url.
_base_url
=
"https://data.dgl.ai/dataset/graphbolt/"
_base_url
=
"https://data.dgl.ai/dataset/graphbolt/"
_accelerated_url
=
(
"https://dgl-data.s3-accelerate.amazonaws.com/dataset/graphbolt/"
)
_datasets
=
[
_datasets
=
[
"ogbn-mag"
,
"ogbn-mag"
,
"ogbl-citation2"
,
"ogbl-citation2"
,
"ogbn-products"
,
"ogbn-products"
,
"ogb-lsc-mag240m"
,
]
]
_large_datasets
=
[
"ogb-lsc-mag240m"
]
_all_datasets
=
_datasets
+
_large_datasets
def
__init__
(
self
,
name
:
str
,
root
:
str
=
"datasets"
)
->
OnDiskDataset
:
def
__init__
(
self
,
name
:
str
,
root
:
str
=
"datasets"
)
->
OnDiskDataset
:
dataset_dir
=
os
.
path
.
join
(
root
,
name
)
dataset_dir
=
os
.
path
.
join
(
root
,
name
)
if
not
os
.
path
.
exists
(
dataset_dir
):
if
not
os
.
path
.
exists
(
dataset_dir
):
if
name
not
in
self
.
_datasets
:
if
name
not
in
self
.
_
all_
datasets
:
raise
RuntimeError
(
raise
RuntimeError
(
f
"Dataset
{
name
}
is not available. Available datasets are "
f
"Dataset
{
name
}
is not available. Available datasets are "
f
"
{
self
.
_datasets
}
."
f
"
{
self
.
_
all_
datasets
}
."
)
)
url
=
self
.
_base_url
+
name
+
".zip"
url
=
(
self
.
_accelerated_url
if
name
in
self
.
_large_datasets
else
self
.
_base_url
)
url
+=
name
+
".zip"
os
.
makedirs
(
root
,
exist_ok
=
True
)
os
.
makedirs
(
root
,
exist_ok
=
True
)
zip_file_path
=
os
.
path
.
join
(
root
,
name
+
".zip"
)
zip_file_path
=
os
.
path
.
join
(
root
,
name
+
".zip"
)
download
(
url
,
path
=
zip_file_path
)
download
(
url
,
path
=
zip_file_path
)
...
...
tests/python/pytorch/graphbolt/impl/test_ondisk_dataset.py
View file @
43b45ab6
...
@@ -1702,7 +1702,7 @@ def test_BuiltinDataset():
...
@@ -1702,7 +1702,7 @@ def test_BuiltinDataset():
# Case 1: download from DGL S3 storage.
# Case 1: download from DGL S3 storage.
dataset_name
=
"test-only"
dataset_name
=
"test-only"
# Add test-only dataset to the builtin dataset list for testing only.
# Add test-only dataset to the builtin dataset list for testing only.
gb
.
BuiltinDataset
.
_datasets
.
append
(
dataset_name
)
gb
.
BuiltinDataset
.
_
all_
datasets
.
append
(
dataset_name
)
dataset
=
gb
.
BuiltinDataset
(
name
=
dataset_name
,
root
=
test_dir
).
load
()
dataset
=
gb
.
BuiltinDataset
(
name
=
dataset_name
,
root
=
test_dir
).
load
()
assert
dataset
.
graph
is
not
None
assert
dataset
.
graph
is
not
None
assert
dataset
.
feature
is
not
None
assert
dataset
.
feature
is
not
None
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment