"tests/git@developer.sourcefind.cn:OpenDAS/mmcv.git" did not exist on "f7caa80f9ca385a7b54f78627915cc2171946b01"
Unverified Commit 743c3840 authored by Xiangkun Hu's avatar Xiangkun Hu Committed by GitHub
Browse files

[Dataset] KarateClubDataset (#1897)



* karateclub dataset

* PPIDataset

* Revert "PPIDataset"

This reverts commit 264bd0c960cfa698a7bb946dad132bf52c2d0c8a.

* Update karate.py

* doc string
Co-authored-by: default avatarxiang song(charlie.song) <classicxsong@gmail.com>
parent cb2c4ec1
......@@ -11,12 +11,13 @@ from .reddit import RedditDataset
from .ppi import PPIDataset, LegacyPPIDataset
from .tu import TUDataset, LegacyTUDataset
from .gnn_benckmark import AmazonCoBuy, CoraFull, Coauthor
from .karate import KarateClub
from .karate import KarateClub, KarateClubDataset
from .gindt import GINDataset
from .bitcoinotc import BitcoinOTC
from .gdelt import GDELT
from .icews18 import ICEWS18
from .qm7b import QM7b
from .dgl_dataset import DGLDataset, DGLBuiltinDataset
from .citation_graph import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
......
......@@ -2,30 +2,92 @@
"""
import numpy as np
import networkx as nx
from .. import convert
from .. import backend as F
from .dgl_dataset import DGLDataset
from .utils import deprecate_property
from ..convert import graph as dgl_graph
class KarateClub(object):
"""
Zachary's karate club is a social network of a university karate club, described in the paper
"An Information Flow Model for Conflict and Fission in Small Groups" by Wayne W. Zachary. The
network became a popular example of community structure in networks after its use by Michelle
Girvan and Mark Newman in 2002.
__all__ = ['KarateClubDataset', 'KarateClub']
This dataset has only one graph, with ndata 'label' means whether the node is belong to the "Mr. Hi" club.
"""
class KarateClubDataset(DGLDataset):
r""" Karate Club dataset for Node Classification
.. deprecated:: 0.5.0
`data` is deprecated, it is replaced by:
>>> dataset = KarateClubDataset()
>>> g = dataset[0]
Zachary's karate club is a social network of a university
karate club, described in the paper "An Information Flow
Model for Conflict and Fission in Small Groups" by Wayne W. Zachary.
The network became a popular example of community structure in
networks after its use by Michelle Girvan and Mark Newman in 2002.
Official website: http://konect.cc/networks/ucidata-zachary/
Karate Club dataset statistics:
Nodes: 34
Edges: 156
Number of Classes: 2
Attributes
----------
num_classes : int
Number of node classes
data : list
A list of DGLGraph objects
Examples
--------
>>> dataset = KarateClubDataset()
>>> num_classes = dataset.num_classes
>>> g = dataset[0]
>>> labels = g.ndata['label']
"""
def __init__(self):
kG = nx.karate_club_graph()
self.label = np.asarray(
[kG.nodes[i]['club'] != 'Mr. Hi' for i in kG.nodes]).astype(np.int64)
g = convert.graph(kG)
g.ndata['label'] = self.label
self.data = [g]
super(KarateClubDataset, self).__init__(name='karate_club')
def process(self):
kc_graph = nx.karate_club_graph()
label = np.asarray(
[kc_graph.nodes[i]['club'] != 'Mr. Hi' for i in kc_graph.nodes]).astype(np.int64)
label = F.tensor(label)
g = dgl_graph(kc_graph)
g.ndata['label'] = label
self._graph = g
self._data = [g]
@property
def num_classes(self):
"""Number of classes."""
return 2
@property
def data(self):
deprecate_property('dataset.data', 'dataset[0]')
return self._data
def __getitem__(self, idx):
r""" Get graph object
Parameters
----------
idx : int
Item index, KarateClubDataset has only one graph object
Returns
-------
dgl.DGLGraph
graph structure and labels.
- ndata['label']: ground truth labels
"""
assert idx == 0, "This dataset has only one graph"
return self.data[0]
return self._graph
def __len__(self):
return len(self.data)
r"""The number of graphs in the dataset."""
return 1
KarateClub = KarateClubDataset
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment