Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
dgl
Commits
e594b4a8
Unverified
Commit
e594b4a8
authored
Oct 12, 2023
by
paoxiaode
Committed by
GitHub
Oct 12, 2023
Browse files
[Dataset] add COCOsuperpixel dataset (#6407)
parent
fc06d7fc
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
301 additions
and
0 deletions
+301
-0
python/dgl/data/__init__.py
python/dgl/data/__init__.py
+1
-0
python/dgl/data/lrgb.py
python/dgl/data/lrgb.py
+283
-0
tests/integration/test_data.py
tests/integration/test_data.py
+17
-0
No files found.
python/dgl/data/__init__.py
View file @
e594b4a8
...
@@ -76,6 +76,7 @@ from .heterophilous_graphs import (
...
@@ -76,6 +76,7 @@ from .heterophilous_graphs import (
# datasets.
# datasets.
try
:
try
:
from
.lrgb
import
(
from
.lrgb
import
(
COCOSuperpixelsDataset
,
PeptidesFunctionalDataset
,
PeptidesFunctionalDataset
,
PeptidesStructuralDataset
,
PeptidesStructuralDataset
,
VOCSuperpixelsDataset
,
VOCSuperpixelsDataset
,
...
...
python/dgl/data/lrgb.py
View file @
e594b4a8
...
@@ -797,3 +797,286 @@ class VOCSuperpixelsDataset(DGLDataset):
...
@@ -797,3 +797,286 @@ class VOCSuperpixelsDataset(DGLDataset):
return
self
.
graphs
[
idx
]
return
self
.
graphs
[
idx
]
return
self
.
_transform
(
self
.
graphs
[
idx
])
return
self
.
_transform
(
self
.
graphs
[
idx
])
class
COCOSuperpixelsDataset
(
DGLDataset
):
r
"""COCO superpixel dataset for the node classification task.
DGL dataset of COCO-SP in the LRGB benckmark which contains image
superpixels and a semantic segmentation label for each node superpixel.
Based on the COCO 2017 dataset. Original source `<https://cocodataset.org>`_
Reference `<https://arxiv.org/abs/2206.08164.pdf>`_
Statistics:
- Train examples: 113,286
- Valid examples: 5,000
- Test examples: 5,000
- Average number of nodes: 476.88
- Average number of edges: 2,710.48
- Number of node classes: 81
Parameters
----------
raw_dir : str
Directory to store all the downloaded raw datasets.
Default: "~/.dgl/".
split : str
Should be chosen from ["train", "val", "test"]
Default: "train".
construct_format : str, optional
Option to select the graph construction format.
Should be chosen from the following formats:
- "edge_wt_only_coord": the graphs are 8-nn graphs with the edge weights
computed based on only spatial coordinates of superpixel nodes.
- "edge_wt_coord_feat": the graphs are 8-nn graphs with the edge weights
computed based on combination of spatial coordinates and feature
values of superpixel nodes.
- "edge_wt_region_boundary": the graphs region boundary graphs where two
regions (i.e. superpixel nodes) have an edge between them if they
share a boundary in the original image.
Default: "edge_wt_region_boundary".
slic_compactness : int, optional
Option to select compactness of slic that was used for superpixels
Should be chosen from [10, 30]
Default: 30.
force_reload : bool
Whether to reload the dataset.
Default: False.
verbose : bool
Whether to print out progress information.
Default: False.
transform : callable, optional
A transform that takes in a :class:`~dgl.DGLGraph` object and returns
a transformed version. The :class:`~dgl.DGLGraph` object will be
transformed before every access.
Examples
---------
>>> from dgl.data import COCOSuperpixelsDataset
>>> train_dataset = COCOSuperpixelsDataset(split="train")
>>> len(train_dataset)
113286
>>> train_dataset.num_classes
81
>>> graph = train_dataset[0]
>>> graph
Graph(num_nodes=488, num_edges=2766,
ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32),
'label': Scheme(shape=(), dtype=torch.uint8)}
edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)})
>>> # support tensor to be index when transform is None
>>> # see details in __getitem__ function
>>> import torch
>>> idx = torch.tensor([0, 1, 2])
>>> train_dataset_subset = train_dataset[idx]
>>> train_dataset_subset[0]
Graph(num_nodes=488, num_edges=2766,
ndata_schemes={'feat': Scheme(shape=(14,), dtype=torch.float32),
'label': Scheme(shape=(), dtype=torch.uint8)}
edata_schemes={'feat': Scheme(shape=(2,), dtype=torch.float32)})
"""
urls
=
{
10
:
{
"edge_wt_only_coord"
:
"""
https://www.dropbox.com/s/prqizdep8gk0ndk/coco_superpixels_edge_wt_only_coord.zip?dl=1
"""
,
"edge_wt_coord_feat"
:
"""
https://www.dropbox.com/s/zftoyln1pkcshcg/coco_superpixels_edge_wt_coord_feat.zip?dl=1
"""
,
"edge_wt_region_boundary"
:
"""
https://www.dropbox.com/s/fhihfcyx2y978u8/coco_superpixels_edge_wt_region_boundary.zip?dl=1
"""
,
},
30
:
{
"edge_wt_only_coord"
:
"""
https://www.dropbox.com/s/hrbfkxmc5z9lsaz/coco_superpixels_edge_wt_only_coord.zip?dl=1
"""
,
"edge_wt_coord_feat"
:
"""
https://www.dropbox.com/s/4rfa2d5ij1gfu9b/coco_superpixels_edge_wt_coord_feat.zip?dl=1
"""
,
"edge_wt_region_boundary"
:
"""
https://www.dropbox.com/s/r6ihg1f4pmyjjy0/coco_superpixels_edge_wt_region_boundary.zip?dl=1
"""
,
},
}
def
__init__
(
self
,
raw_dir
=
None
,
split
=
"train"
,
construct_format
=
"edge_wt_region_boundary"
,
slic_compactness
=
30
,
force_reload
=
None
,
verbose
=
None
,
transform
=
None
,
):
assert
split
in
[
"train"
,
"val"
,
"test"
],
"split not valid."
assert
construct_format
in
[
"edge_wt_only_coord"
,
"edge_wt_coord_feat"
,
"edge_wt_region_boundary"
,
],
"construct_format not valid."
assert
slic_compactness
in
[
10
,
30
],
"slic_compactness not valid."
self
.
construct_format
=
construct_format
self
.
slic_compactness
=
slic_compactness
self
.
split
=
split
self
.
graphs
=
[]
super
().
__init__
(
name
=
"COCO-SP"
,
raw_dir
=
raw_dir
,
url
=
self
.
urls
[
self
.
slic_compactness
][
self
.
construct_format
],
force_reload
=
force_reload
,
verbose
=
verbose
,
transform
=
transform
,
)
@
property
def
save_path
(
self
):
r
"""Directory to save the processed dataset."""
return
os
.
path
.
join
(
self
.
raw_path
,
"slic_compactness_"
+
str
(
self
.
slic_compactness
),
self
.
construct_format
,
)
@
property
def
raw_data_path
(
self
):
r
"""Path to save the raw dataset file."""
return
os
.
path
.
join
(
self
.
save_path
,
f
"
{
self
.
split
}
.pickle"
)
@
property
def
graph_path
(
self
):
r
"""Path to save the processed dataset file."""
return
os
.
path
.
join
(
self
.
save_path
,
f
"processed_
{
self
.
split
}
.pkl"
)
@
property
def
num_classes
(
self
):
r
"""Number of classes for each node."""
return
81
def
__len__
(
self
):
r
"""The number of examples in the dataset."""
return
len
(
self
.
graphs
)
def
download
(
self
):
zip_file_path
=
os
.
path
.
join
(
self
.
raw_path
,
"coco_superpixels_"
+
self
.
construct_format
+
".zip"
)
path
=
download
(
self
.
url
,
path
=
zip_file_path
,
overwrite
=
True
)
extract_archive
(
path
,
self
.
raw_path
,
overwrite
=
True
)
makedirs
(
self
.
save_path
)
os
.
rename
(
os
.
path
.
join
(
self
.
raw_path
,
"coco_superpixels_"
+
self
.
construct_format
),
self
.
save_path
,
)
os
.
unlink
(
path
)
def
label_remap
(
self
):
# Util function to remap the labels as the original label
# idxs are not contiguous
# fmt: off
original_label_idx
=
[
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
27
,
28
,
31
,
32
,
33
,
34
,
35
,
36
,
37
,
38
,
39
,
40
,
41
,
42
,
43
,
44
,
46
,
47
,
48
,
49
,
50
,
51
,
52
,
53
,
54
,
55
,
56
,
57
,
58
,
59
,
60
,
61
,
62
,
63
,
64
,
65
,
67
,
70
,
72
,
73
,
74
,
75
,
76
,
77
,
78
,
79
,
80
,
81
,
82
,
84
,
85
,
86
,
87
,
88
,
89
,
90
]
# fmt: on
label_map
=
{}
for
i
,
key
in
enumerate
(
original_label_idx
):
label_map
[
key
]
=
i
return
label_map
def
process
(
self
):
with
open
(
self
.
raw_data_path
,
"rb"
)
as
file
:
graphs
=
pickle
.
load
(
file
)
label_map
=
self
.
label_remap
()
for
idx
in
tqdm
(
range
(
len
(
graphs
)),
desc
=
f
"Processing
{
self
.
split
}
dataset"
):
graph
=
graphs
[
idx
]
"""
Each `graph` is a tuple (x, edge_attr, edge_index, y)
Shape of x : [num_nodes, 14]
Shape of edge_attr : [num_edges, 1] or [num_edges, 2]
Shape of edge_index : [2, num_edges]
Shape of y : [num_nodes]
"""
DGLgraph
=
dgl_graph
(
(
graph
[
2
][
0
],
graph
[
2
][
1
]),
num_nodes
=
len
(
graph
[
3
]),
)
DGLgraph
.
ndata
[
"feat"
]
=
graph
[
0
].
to
(
F
.
float32
)
DGLgraph
.
edata
[
"feat"
]
=
graph
[
1
].
to
(
F
.
float32
)
y
=
F
.
tensor
(
graph
[
3
])
# Label remapping. See self.label_remap() func
for
i
,
label
in
enumerate
(
y
):
y
[
i
]
=
label_map
[
label
.
item
()]
DGLgraph
.
ndata
[
"label"
]
=
y
self
.
graphs
.
append
(
DGLgraph
)
def
load
(
self
):
with
open
(
self
.
graph_path
,
"rb"
)
as
file
:
graphs
=
pickle
.
load
(
file
)
self
.
graphs
=
graphs
def
save
(
self
):
with
open
(
os
.
path
.
join
(
self
.
graph_path
),
"wb"
)
as
file
:
pickle
.
dump
(
self
.
graphs
,
file
)
def
has_cache
(
self
):
return
os
.
path
.
exists
(
self
.
graph_path
)
def
__getitem__
(
self
,
idx
):
r
"""Get the idx-th sample.
Parameters
---------
idx : int or tensor
The sample index.
1-D tensor as `idx` is allowed when transform is None.
Returns
-------
:class:`dgl.DGLGraph`
graph structure, node features, node labels and edge features.
- ``ndata['feat']``: node features
- ``ndata['label']``: node labels
- ``edata['feat']``: edge features
or
:class:`dgl.data.utils.Subset`
Subset of the dataset at specified indices
"""
if
F
.
is_tensor
(
idx
)
and
idx
.
dim
()
==
1
:
if
self
.
_transform
is
None
:
return
Subset
(
self
,
idx
.
cpu
())
raise
ValueError
(
"Tensor idx not supported when transform is not None."
)
if
self
.
_transform
is
None
:
return
self
.
graphs
[
idx
]
return
self
.
_transform
(
self
.
graphs
[
idx
])
tests/integration/test_data.py
View file @
e594b4a8
...
@@ -107,6 +107,23 @@ def test_VOC_superpixels():
...
@@ -107,6 +107,23 @@ def test_VOC_superpixels():
assert
g2
.
num_edges
()
-
g1
.
num_edges
()
==
g1
.
num_nodes
()
assert
g2
.
num_edges
()
-
g1
.
num_edges
()
==
g1
.
num_nodes
()
@
unittest
.
skipIf
(
F
.
_default_context_str
==
"gpu"
,
reason
=
"Datasets don't need to be tested on GPU."
,
)
@
unittest
.
skipIf
(
dgl
.
backend
.
backend_name
!=
"pytorch"
,
reason
=
"only supports pytorch"
)
def
test_COCO_superpixels
():
transform
=
dgl
.
AddSelfLoop
(
allow_duplicate
=
True
)
dataset1
=
data
.
COCOSuperpixelsDataset
()
g1
=
dataset1
[
0
]
dataset2
=
data
.
COCOSuperpixelsDataset
(
transform
=
transform
)
g2
=
dataset2
[
0
]
assert
g2
.
num_edges
()
-
g1
.
num_edges
()
==
g1
.
num_nodes
()
@
unittest
.
skipIf
(
@
unittest
.
skipIf
(
F
.
_default_context_str
==
"gpu"
,
F
.
_default_context_str
==
"gpu"
,
reason
=
"Datasets don't need to be tested on GPU."
,
reason
=
"Datasets don't need to be tested on GPU."
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment