Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
8edcad2d
"src/vscode:/vscode.git/clone" did not exist on "8ca179a0a913deca80f5a1d1f8d31f504cb44f99"
Unverified
Commit
8edcad2d
authored
Oct 08, 2023
by
paoxiaode
Committed by
GitHub
Oct 08, 2023
Browse files
[Dataset] Add VOCSuperpixels dataset in LRGB (#6389)
parent
403dba62
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
331 additions
and
6 deletions
+331
-6
docs/source/api/python/dgl.data.rst
docs/source/api/python/dgl.data.rst
+1
-0
python/dgl/data/__init__.py
python/dgl/data/__init__.py
+5
-1
python/dgl/data/lrgb.py
python/dgl/data/lrgb.py
+308
-5
tests/integration/test_data.py
tests/integration/test_data.py
+17
-0
No files found.
docs/source/api/python/dgl.data.rst
View file @
8edcad2d
...
@@ -70,6 +70,7 @@ Datasets for node classification/regression tasks
...
@@ -70,6 +70,7 @@ Datasets for node classification/regression tasks
MovieLensDataset
MovieLensDataset
PeptidesStructuralDataset
PeptidesStructuralDataset
PeptidesFunctionalDataset
PeptidesFunctionalDataset
VOCSuperpixelsDataset
Edge Prediction Datasets
Edge Prediction Datasets
---------------------------------------
---------------------------------------
...
...
python/dgl/data/__init__.py
View file @
8edcad2d
...
@@ -75,7 +75,11 @@ from .heterophilous_graphs import (
...
@@ -75,7 +75,11 @@ from .heterophilous_graphs import (
# Exception handling was added to prevent crashes for users who are using other
# Exception handling was added to prevent crashes for users who are using other
# datasets.
# datasets.
try
:
try
:
from
.lrgb
import
PeptidesFunctionalDataset
,
PeptidesStructuralDataset
from
.lrgb
import
(
PeptidesFunctionalDataset
,
PeptidesStructuralDataset
,
VOCSuperpixelsDataset
,
)
except
ImportError
:
except
ImportError
:
pass
pass
from
.pattern
import
PATTERNDataset
from
.pattern
import
PATTERNDataset
...
...
python/dgl/data/lrgb.py
View file @
8edcad2d
...
@@ -10,7 +10,14 @@ from .. import backend as F
...
@@ -10,7 +10,14 @@ from .. import backend as F
from
..convert
import
graph
as
dgl_graph
from
..convert
import
graph
as
dgl_graph
from
.dgl_dataset
import
DGLDataset
from
.dgl_dataset
import
DGLDataset
from
.utils
import
download
,
load_graphs
,
save_graphs
,
Subset
from
.utils
import
(
download
,
extract_archive
,
load_graphs
,
makedirs
,
save_graphs
,
Subset
,
)
class
PeptidesStructuralDataset
(
DGLDataset
):
class
PeptidesStructuralDataset
(
DGLDataset
):
...
@@ -48,7 +55,7 @@ class PeptidesStructuralDataset(DGLDataset):
...
@@ -48,7 +55,7 @@ class PeptidesStructuralDataset(DGLDataset):
Parameters
Parameters
----------
----------
raw_dir : str
raw_dir : str
Raw file d
irectory to
download/contains the input data directory
.
D
irectory to
store all the downloaded raw datasets
.
Default: "~/.dgl/".
Default: "~/.dgl/".
force_reload : bool
force_reload : bool
Whether to reload the dataset.
Whether to reload the dataset.
...
@@ -79,6 +86,9 @@ class PeptidesStructuralDataset(DGLDataset):
...
@@ -79,6 +86,9 @@ class PeptidesStructuralDataset(DGLDataset):
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
>>> # accept tensor to be index, but will ignore transform parameter
>>> # get train dataset
>>> split_dict = dataset.get_idx_split()
>>> split_dict = dataset.get_idx_split()
>>> trainset = dataset[split_dict["train"]]
>>> trainset = dataset[split_dict["train"]]
>>> graph, label = trainset[0]
>>> graph, label = trainset[0]
...
@@ -86,6 +96,16 @@ class PeptidesStructuralDataset(DGLDataset):
...
@@ -86,6 +96,16 @@ class PeptidesStructuralDataset(DGLDataset):
Graph(num_nodes=338, num_edges=682,
Graph(num_nodes=338, num_edges=682,
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
>>> # get subset of dataset
>>> import torch
>>> idx = torch.tensor([0, 1, 2])
>>> dataset_subset = dataset[idx]
>>> graph, label = dataset_subset[0]
>>> graph
Graph(num_nodes=119, num_edges=244,
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
"""
"""
def
__init__
(
def
__init__
(
...
@@ -234,7 +254,21 @@ class PeptidesStructuralDataset(DGLDataset):
...
@@ -234,7 +254,21 @@ class PeptidesStructuralDataset(DGLDataset):
return
len
(
self
.
graphs
)
return
len
(
self
.
graphs
)
def
__getitem__
(
self
,
idx
):
def
__getitem__
(
self
,
idx
):
"""Get datapoint with index"""
"""Get the idx-th sample.
Parameters
---------
idx : int or tensor
The sample index, if idx is tensor will ignore transform.
Returns
-------
(:class:`dgl.DGLGraph`, Tensor)
Graph with node feature stored in ``feat`` field and its label.
or
:class:`dgl.data.utils.Subset`
Subset of the dataset at specified indices
"""
if
F
.
is_tensor
(
idx
)
and
idx
.
dim
()
==
1
:
if
F
.
is_tensor
(
idx
)
and
idx
.
dim
()
==
1
:
return
Subset
(
self
,
idx
.
cpu
())
return
Subset
(
self
,
idx
.
cpu
())
...
@@ -271,7 +305,7 @@ class PeptidesFunctionalDataset(DGLDataset):
...
@@ -271,7 +305,7 @@ class PeptidesFunctionalDataset(DGLDataset):
Parameters
Parameters
----------
----------
raw_dir : str
raw_dir : str
Raw file d
irectory to
download/contains the input data directory
.
D
irectory to
store all the downloaded raw datasets
.
Default: "~/.dgl/".
Default: "~/.dgl/".
force_reload : bool
force_reload : bool
Whether to reload the dataset.
Whether to reload the dataset.
...
@@ -302,6 +336,9 @@ class PeptidesFunctionalDataset(DGLDataset):
...
@@ -302,6 +336,9 @@ class PeptidesFunctionalDataset(DGLDataset):
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
>>> # accept tensor to be index, but will ignore transform parameter
>>> # get train dataset
>>> split_dict = dataset.get_idx_split()
>>> split_dict = dataset.get_idx_split()
>>> trainset = dataset[split_dict["train"]]
>>> trainset = dataset[split_dict["train"]]
>>> graph, label = trainset[0]
>>> graph, label = trainset[0]
...
@@ -310,6 +347,15 @@ class PeptidesFunctionalDataset(DGLDataset):
...
@@ -310,6 +347,15 @@ class PeptidesFunctionalDataset(DGLDataset):
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
>>> # get subset of dataset
>>> import torch
>>> idx = torch.tensor([0, 1, 2])
>>> dataset_subset = dataset[idx]
>>> graph, label = dataset_subset[0]
>>> graph
Graph(num_nodes=119, num_edges=244,
ndata_schemes={'feat': Scheme(shape=(9,), dtype=torch.int64)}
edata_schemes={'feat': Scheme(shape=(3,), dtype=torch.int64)})
"""
"""
def
__init__
(
def
__init__
(
...
@@ -442,7 +488,21 @@ class PeptidesFunctionalDataset(DGLDataset):
...
@@ -442,7 +488,21 @@ class PeptidesFunctionalDataset(DGLDataset):
return
len
(
self
.
graphs
)
return
len
(
self
.
graphs
)
def
__getitem__
(
self
,
idx
):
def
__getitem__
(
self
,
idx
):
"""Get datapoint with index"""
"""Get the idx-th sample.
Parameters
---------
idx : int or tensor
The sample index, if idx is tensor will ignore transform.
Returns
-------
(:class:`dgl.DGLGraph`, Tensor)
Graph with node feature stored in ``feat`` field and its label.
or
:class:`dgl.data.utils.Subset`
Subset of the dataset at specified indices
"""
if
F
.
is_tensor
(
idx
)
and
idx
.
dim
()
==
1
:
if
F
.
is_tensor
(
idx
)
and
idx
.
dim
()
==
1
:
return
Subset
(
self
,
idx
.
cpu
())
return
Subset
(
self
,
idx
.
cpu
())
...
@@ -450,3 +510,246 @@ class PeptidesFunctionalDataset(DGLDataset):
...
@@ -450,3 +510,246 @@ class PeptidesFunctionalDataset(DGLDataset):
return
self
.
graphs
[
idx
],
self
.
labels
[
idx
]
return
self
.
graphs
[
idx
],
self
.
labels
[
idx
]
else
:
else
:
return
self
.
_transform
(
self
.
graphs
[
idx
]),
self
.
labels
[
idx
]
return
self
.
_transform
(
self
.
graphs
[
idx
]),
self
.
labels
[
idx
]
class
VOCSuperpixelsDataset
(
DGLDataset
):
r
"""VOCSuperpixels dataset for the node classification task.
DGL dataset of Pascal VOC Superpixels which contains image superpixels
and a semantic segmentation label for each node superpixel.
color map
0=background, 1=aeroplane, 2=bicycle, 3=bird, 4=boat, 5=bottle,
6=bus, 7=car, 8=cat, 9=chair, 10=cow,
11=diningtable, 12=dog, 13=horse, 14=motorbike, 15=person,
16=potted plant, 17=sheep, 18=sofa, 19=train, 20=tv/monitor
Reference `<https://arxiv.org/abs/2206.08164.pdf>`_
Statistics:
- Train examples: 8,498
- Valid examples: 1,428
- Test examples: 1,429
- Average number of nodes: 479.40
- Average number of edges: 2,710.48
Parameters
----------
raw_dir : str
Directory to store all the downloaded raw datasets.
Default: "~/.dgl/".
split : str
Should be chosen from ["train", "val", "test"]
Default: "train".
construct_format : str, optional
Option to select the graph construction format.
Should be chosen from the following formats:
"edge_wt_only_coord": the graphs are 8-nn graphs with the edge weights
computed based on only spatial coordinates of superpixel nodes.
"edge_wt_coord_feat": the graphs are 8-nn graphs with the edge weights
computed based on combination of spatial coordinates and feature
values of superpixel nodes.
"edge_wt_region_boundary": the graphs region boundary graphs where two
regions (i.e. superpixel nodes) have an edge between them if they share
a boundary in the original image.
Default: "edge_wt_region_boundary".
slic_compactness : int, optional
Option to select compactness of slic that was used for superpixels
Should be chosen from [10, 30]
Default: 30.
force_reload : bool
Whether to reload the dataset.
Default: False.
verbose : bool
Whether to print out progress information.
Default: False.
transform : callable, optional
A transform that takes in a :class:`~dgl.DGLGraph` object and returns
a transformed version. The :class:`~dgl.DGLGraph` object will be
transformed before every access.
Examples
---------
>>> from dgl.data import VOCSuperpixelsDataset
>>> train_dataset = VOCSuperpixelsDataset(split="train")
>>> len(train_dataset)
8498
>>> train_dataset.num_classes
21
>>> graph = train_dataset[0]
>>> graph
Graph(num_nodes=460, num_edges=2632,
ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int32)}
edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)})
>>> # accept tensor to be index, but will ignore transform parameter
>>> import torch
>>> idx = torch.tensor([0, 1, 2])
>>> train_dataset_subset = train_dataset[idx]
>>> train_dataset_subset[0]
Graph(num_nodes=460, num_edges=2632,
ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int32)}
edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)})
"""
urls
=
{
10
:
{
"edge_wt_only_coord"
:
"""
https://www.dropbox.com/s/rk6pfnuh7tq3t37/voc_superpixels_edge_wt_only_coord.zip?dl=1
"""
,
"edge_wt_coord_feat"
:
"""
https://www.dropbox.com/s/2a53nmfp6llqg8y/voc_superpixels_edge_wt_coord_feat.zip?dl=1
"""
,
"edge_wt_region_boundary"
:
"""
https://www.dropbox.com/s/6pfz2mccfbkj7r3/voc_superpixels_edge_wt_region_boundary.zip?dl=1
"""
,
},
30
:
{
"edge_wt_only_coord"
:
"""
https://www.dropbox.com/s/toqulkdpb1jrswk/voc_superpixels_edge_wt_only_coord.zip?dl=1
"""
,
"edge_wt_coord_feat"
:
"""
https://www.dropbox.com/s/xywki8ysj63584d/voc_superpixels_edge_wt_coord_feat.zip?dl=1
"""
,
"edge_wt_region_boundary"
:
"""
https://www.dropbox.com/s/8x722ai272wqwl4/voc_superpixels_edge_wt_region_boundary.zip?dl=1
"""
,
},
}
def
__init__
(
self
,
raw_dir
=
None
,
split
=
"train"
,
construct_format
=
"edge_wt_region_boundary"
,
slic_compactness
=
30
,
force_reload
=
None
,
verbose
=
None
,
transform
=
None
,
):
self
.
construct_format
=
construct_format
self
.
slic_compactness
=
slic_compactness
assert
split
in
[
"train"
,
"val"
,
"test"
],
"split not valid."
assert
construct_format
in
[
"edge_wt_only_coord"
,
"edge_wt_coord_feat"
,
"edge_wt_region_boundary"
,
],
"construct_format not valid."
assert
slic_compactness
in
[
10
,
30
],
"slic_compactness not valid."
self
.
split
=
split
super
(
VOCSuperpixelsDataset
,
self
).
__init__
(
name
=
"PascalVOC-SP"
,
raw_dir
=
raw_dir
,
url
=
self
.
urls
[
self
.
slic_compactness
][
self
.
construct_format
],
force_reload
=
force_reload
,
verbose
=
verbose
,
transform
=
transform
,
)
@
property
def
save_path
(
self
):
return
os
.
path
.
join
(
self
.
raw_path
,
"slic_compactness_"
+
str
(
self
.
slic_compactness
),
self
.
construct_format
,
)
@
property
def
raw_data_path
(
self
):
return
os
.
path
.
join
(
self
.
save_path
,
f
"
{
self
.
split
}
.pickle"
)
@
property
def
graph_path
(
self
):
return
os
.
path
.
join
(
self
.
save_path
,
f
"processed_
{
self
.
split
}
.pkl"
)
@
property
def
num_classes
(
self
):
r
"""Number of classes for each node."""
return
21
def
__len__
(
self
):
r
"""The number of examples in the dataset."""
return
len
(
self
.
graphs
)
def
download
(
self
):
zip_file_path
=
os
.
path
.
join
(
self
.
raw_path
,
"voc_superpixels_"
+
self
.
construct_format
+
".zip"
)
path
=
download
(
self
.
url
,
path
=
zip_file_path
)
extract_archive
(
path
,
self
.
raw_path
,
overwrite
=
True
)
makedirs
(
self
.
save_path
)
os
.
rename
(
os
.
path
.
join
(
self
.
raw_path
,
"voc_superpixels_"
+
self
.
construct_format
),
self
.
save_path
,
)
os
.
unlink
(
path
)
def
process
(
self
):
with
open
(
self
.
raw_data_path
,
"rb"
)
as
f
:
graphs
=
pickle
.
load
(
f
)
self
.
graphs
=
[]
for
idx
in
tqdm
(
range
(
len
(
graphs
)),
desc
=
f
"Processing
{
self
.
split
}
dataset"
):
graph
=
graphs
[
idx
]
"""
Each `graph` is a tuple (x, edge_attr, edge_index, y)
Shape of x : [num_nodes, 14]
Shape of edge_attr : [num_edges, 1] or [num_edges, 2]
Shape of edge_index : [2, num_edges]
Shape of y : [num_nodes]
"""
DGLgraph
=
dgl_graph
(
(
graph
[
2
][
0
],
graph
[
2
][
1
]),
num_nodes
=
len
(
graph
[
3
]),
)
DGLgraph
.
ndata
[
"feat"
]
=
graph
[
0
].
to
(
F
.
float32
)
DGLgraph
.
edata
[
"feat"
]
=
graph
[
1
].
to
(
F
.
float32
)
DGLgraph
.
ndata
[
"label"
]
=
F
.
tensor
(
graph
[
3
])
self
.
graphs
.
append
(
DGLgraph
)
def
load
(
self
):
with
open
(
self
.
graph_path
,
"rb"
)
as
f
:
f
=
pickle
.
load
(
f
)
self
.
graphs
=
f
def
save
(
self
):
with
open
(
os
.
path
.
join
(
self
.
graph_path
),
"wb"
)
as
f
:
pickle
.
dump
(
self
.
graphs
,
f
)
def
has_cache
(
self
):
return
os
.
path
.
exists
(
self
.
graph_path
)
def
__getitem__
(
self
,
idx
):
r
"""Get the idx-th sample.
Parameters
---------
idx : int or tensor
The sample index, if idx is tensor will ignore transform.
Returns
-------
:class:`dgl.DGLGraph`
graph structure, node features, node labels and edge features.
- ``ndata['feat']``: node features
- ``ndata['label']``: node labels
- ``edata['feat']``: edge features
or
:class:`dgl.data.utils.Subset`
Subset of the dataset at specified indices
"""
if
F
.
is_tensor
(
idx
)
and
idx
.
dim
()
==
1
:
return
Subset
(
self
,
idx
.
cpu
())
if
self
.
_transform
is
None
:
return
self
.
graphs
[
idx
]
else
:
return
self
.
_transform
(
self
.
graphs
[
idx
])
tests/integration/test_data.py
View file @
8edcad2d
...
@@ -90,6 +90,23 @@ def test_peptides_functional():
...
@@ -90,6 +90,23 @@ def test_peptides_functional():
assert
dataset1
.
num_classes
==
label
.
shape
[
0
]
assert
dataset1
.
num_classes
==
label
.
shape
[
0
]
@
unittest
.
skipIf
(
F
.
_default_context_str
==
"gpu"
,
reason
=
"Datasets don't need to be tested on GPU."
,
)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
!=
"pytorch"
,
reason
=
"only supports pytorch"
)
def
test_VOC_superpixels
():
transform
=
dgl
.
AddSelfLoop
(
allow_duplicate
=
True
)
dataset1
=
data
.
VOCSuperpixelsDataset
()
g1
=
dataset1
[
0
]
dataset2
=
data
.
VOCSuperpixelsDataset
(
transform
=
transform
)
g2
=
dataset2
[
0
]
assert
g2
.
num_edges
()
-
g1
.
num_edges
()
==
g1
.
num_nodes
()
@
unittest
.
skipIf
(
@
unittest
.
skipIf
(
F
.
_default_context_str
==
"gpu"
,
F
.
_default_context_str
==
"gpu"
,
reason
=
"Datasets don't need to be tested on GPU."
,
reason
=
"Datasets don't need to be tested on GPU."
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment