Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
2668d62f
Unverified
Commit
2668d62f
authored
Jul 02, 2023
by
Rhett Ying
Committed by
GitHub
Jul 02, 2023
Browse files
[GraphBolt] add abstract Dataset (#5926)
parent
1cbe0b27
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
67 additions
and
0 deletions
+67
-0
python/dgl/graphbolt/__init__.py
python/dgl/graphbolt/__init__.py
+1
-0
python/dgl/graphbolt/dataset.py
python/dgl/graphbolt/dataset.py
+50
-0
tests/python/pytorch/graphbolt/test_dataset.py
tests/python/pytorch/graphbolt/test_dataset.py
+16
-0
No files found.
python/dgl/graphbolt/__init__.py
View file @
2668d62f
...
...
@@ -9,6 +9,7 @@ from .graph_storage import *
from
.itemset
import
*
from
.minibatch_sampler
import
*
from
.feature_store
import
*
from
.dataset
import
*
from
.subgraph_sampler
import
*
...
...
python/dgl/graphbolt/dataset.py
0 → 100644
View file @
2668d62f
"""GraphBolt Dataset."""
from
.feature_store
import
FeatureStore
from
.itemset
import
ItemSet
,
ItemSetDict
__all__
=
[
"Dataset"
]
class
Dataset
:
"""An abstract dataset.
Dataset provides abstraction for accessing the data required for training.
The data abstraction could be a native CPU memory block, a shared memory
block, a file handle of an opened file on disk, a service that provides
the API to access the data e.t.c. There are 3 primary components in the
dataset: *Train-Validation-Test Set*, *Feature Storage*, *Graph Topology*.
*Train-Validation-Test Set*:
The training-validation-testing (TVT) set which is used to train the neural
networks. We calculate the embeddings based on their respective features
and the graph structure, and then utilize the embeddings to optimize the
neural network parameters.
*Feature Storage*:
A key-value store which stores node/edge/graph features.
*Graph Topology*:
Graph topology is used by the subgraph sampling algorithm to
generate a subgraph.
"""
def
train_set
(
self
)
->
ItemSet
or
ItemSetDict
:
"""Return the training set."""
raise
NotImplementedError
def
validation_set
(
self
)
->
ItemSet
or
ItemSetDict
:
"""Return the validation set."""
raise
NotImplementedError
def
test_set
(
self
)
->
ItemSet
or
ItemSetDict
:
"""Return the test set."""
raise
NotImplementedError
def
graph
(
self
)
->
object
:
"""Return the graph."""
raise
NotImplementedError
def
feature
(
self
)
->
FeatureStore
:
"""Return the feature."""
raise
NotImplementedError
tests/python/pytorch/graphbolt/test_dataset.py
0 → 100644
View file @
2668d62f
import
pytest
from
dgl
import
graphbolt
as
gb
def
test_Dataset
():
dataset
=
gb
.
Dataset
()
with
pytest
.
raises
(
NotImplementedError
):
_
=
dataset
.
train_set
()
with
pytest
.
raises
(
NotImplementedError
):
_
=
dataset
.
validation_set
()
with
pytest
.
raises
(
NotImplementedError
):
_
=
dataset
.
test_set
()
with
pytest
.
raises
(
NotImplementedError
):
_
=
dataset
.
graph
()
with
pytest
.
raises
(
NotImplementedError
):
_
=
dataset
.
feature
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment