"src/git@developer.sourcefind.cn:OpenDAS/dgl.git" did not exist on "c3aea1b62c1fa23029b2c9c077ee089fc663dcde"
Unverified Commit a244de57 authored by Mufei Li's avatar Mufei Li Committed by GitHub
Browse files

[Dataset] WisconsinDataset (#5520)


Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-36-188.ap-northeast-1.compute.internal>
parent e8579d8b
...@@ -61,6 +61,7 @@ Datasets for node classification/regression tasks ...@@ -61,6 +61,7 @@ Datasets for node classification/regression tasks
ActorDataset ActorDataset
CornellDataset CornellDataset
TexasDataset TexasDataset
WisconsinDataset
Edge Prediction Datasets Edge Prediction Datasets
--------------------------------------- ---------------------------------------
......
...@@ -59,6 +59,7 @@ from .geom_gcn import ( ...@@ -59,6 +59,7 @@ from .geom_gcn import (
CornellDataset, CornellDataset,
SquirrelDataset, SquirrelDataset,
TexasDataset, TexasDataset,
WisconsinDataset,
) )
from .pattern import PATTERNDataset from .pattern import PATTERNDataset
from .wikics import WikiCSDataset from .wikics import WikiCSDataset
......
...@@ -405,3 +405,79 @@ class TexasDataset(GeomGCNDataset): ...@@ -405,3 +405,79 @@ class TexasDataset(GeomGCNDataset):
verbose=verbose, verbose=verbose,
transform=transform, transform=transform,
) )
class WisconsinDataset(GeomGCNDataset):
r"""Wisconsin subset of
`WebKB <http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-11/www/wwkb/>`__,
later modified by `Geom-GCN: Geometric Graph Convolutional Networks
<https://arxiv.org/abs/2002.05287>`__
Nodes represent web pages. Edges represent hyperlinks between them. Node
features are the bag-of-words representation of web pages. The web pages
are manually classified into the five categories, student, project, course,
staff, and faculty.
Statistics:
- Nodes: 251
- Edges: 515
- Number of Classes: 5
- 10 train/val/test splits
- Train: 120
- Val: 80
- Test: 51
Parameters
----------
raw_dir : str, optional
Raw file directory to store the processed data. Default: ~/.dgl/
force_reload : bool, optional
Whether to re-download the data source. Default: False
verbose : bool, optional
Whether to print progress information. Default: True
transform : callable, optional
A transform that takes in a :class:`~dgl.DGLGraph` object and returns
a transformed version. The :class:`~dgl.DGLGraph` object will be
transformed before every access. Default: None
Attributes
----------
num_classes : int
Number of node classes
Notes
-----
The graph does not come with edges for both directions.
Examples
--------
>>> from dgl.data import WisconsinDataset
>>> dataset = WisconsinDataset()
>>> g = dataset[0]
>>> num_classes = dataset.num_classes
>>> # get node features
>>> feat = g.ndata["feat"]
>>> # get data split
>>> train_mask = g.ndata["train_mask"]
>>> val_mask = g.ndata["val_mask"]
>>> test_mask = g.ndata["test_mask"]
>>> # get labels
>>> label = g.ndata['label']
"""
def __init__(
self, raw_dir=None, force_reload=False, verbose=True, transform=None
):
super(WisconsinDataset, self).__init__(
name="wisconsin",
raw_dir=raw_dir,
force_reload=force_reload,
verbose=verbose,
transform=transform,
)
...@@ -71,3 +71,20 @@ def test_texas(): ...@@ -71,3 +71,20 @@ def test_texas():
assert g.num_edges() == 325 assert g.num_edges() == 325
g2 = dgl.data.TexasDataset(force_reload=True, transform=transform)[0] g2 = dgl.data.TexasDataset(force_reload=True, transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes() assert g2.num_edges() - g.num_edges() == g.num_nodes()
@unittest.skipIf(
F._default_context_str == "gpu",
reason="Datasets don't need to be tested on GPU.",
)
@unittest.skipIf(
dgl.backend.backend_name != "pytorch", reason="only supports pytorch"
)
def test_wisconsin():
transform = dgl.AddSelfLoop(allow_duplicate=True)
g = dgl.data.WisconsinDataset(force_reload=True)[0]
assert g.num_nodes() == 251
assert g.num_edges() == 515
g2 = dgl.data.WisconsinDataset(force_reload=True, transform=transform)[0]
assert g2.num_edges() - g.num_edges() == g.num_nodes()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment