Unverified Commit 8f99b131 authored by PengZhang's avatar PengZhang Committed by GitHub
Browse files

feature: add a parse parameter degree_as_nlabel for pytorch-gin demo (#3676)

* feature: add a parse parameter degree_as_nlabel for pytorch-gin demo

* fix some typo

* [fix]: allow to benchmark all of the 9 dataset.

* [Feature] add epoch number to log

* [Feature]:simply list the command lines for all datasets (https://github.com/dmlc/dgl/pull/3676#discussion_r790270705

) and run a test.

* Update README.md
Co-authored-by: default avatarUbuntu <ubuntu@ip-172-31-10-175.ap-northeast-1.compute.internal>
Co-authored-by: default avatarMufei Li <mufeili1996@gmail.com>
parent 8f5baa15
...@@ -28,18 +28,56 @@ An experiment on the GIN in customized settings can be run with ...@@ -28,18 +28,56 @@ An experiment on the GIN in customized settings can be run with
python main.py [--device 0 | --disable-cuda] --dataset COLLAB \ python main.py [--device 0 | --disable-cuda] --dataset COLLAB \
--graph_pooling_type max --neighbor_pooling_type sum --graph_pooling_type max --neighbor_pooling_type sum
``` ```
add `--degree_as_nlabel` to use one-hot encodings of node degrees as node feature vectors
Results Results
------- -------
Run with following with the double SUM pooling way: results may **fluctuate**, due to random factors and the relatively small data set. if you want to follow the paper's setting, consider the script below.
(tested dataset: "MUTAG"(default), "COLLAB", "IMDBBINARY", "IMDBMULTI")
```bash ```bash
# 4 bioinformatics datasets setting graph_pooling_type=sum, the nodes have categorical input features
python main.py --dataset MUTAG --device 0 \ python main.py --dataset MUTAG --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --graph_pooling_type sum --neighbor_pooling_type sum --filename MUTAG.txt
python main.py --dataset PTC --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --filename PTC.txt
python main.py --dataset NCI1 --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --filename NCI1.txt
python main.py --dataset PROTEINS --device 0 \
--graph_pooling_type sum --neighbor_pooling_type sum --filename PROTEINS.txt
# 5 social network datasets setting graph_pooling_type=mean, for the REDDIT datasets, we set all node feature vectors to be the same
# (thus, features here are uninformative); for the other social networks, we use one-hot encodings of node degrees.
python main.py --dataset COLLAB --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --degree_as_nlabel --filename COLLAB.txt
python main.py --dataset IMDBBINARY --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --degree_as_nlabel --filename IMDBBINARY.txt
python main.py --dataset IMDBMULTI --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --degree_as_nlabel --filename IMDBMULTI.txt
python main.py --dataset REDDITBINARY --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --filename REDDITBINARY.txt --fold_idx 6 --epoch 120
python main.py --dataset REDDITMULTI5K --device 0 \
--graph_pooling_type mean --neighbor_pooling_type sum --filename REDDITMULTI5K.txt
``` ```
* MUTAG: 0.85 (paper: ~0.89) one fold of 10 result are below.
* COLLAB: 0.89 (paper: ~0.80)
* IMDBBINARY: 0.76 (paper: ~0.75) | dataset | our result | paper report |
* IMDBMULTI: 0.51 (paper: ~0.52) | ------------- | ---------- | ------------ |
| MUTAG | 89.4 | 89.4 ± 5.6 |
| PTC | 68.5 | 64.6 ± 7.0 |
| NCI1 | 78.5 | 82.7 ± 1.7 |
| PROTEINS | 72.3 | 76.2 ± 2.8 |
| COLLAB | 81.6 | 80.2 ± 1.9 |
| IMDBBINARY | 73.0 | 75.1 ± 5.1 |
| IMDBMULTI | 54.0 | 52.3 ± 2.8 |
| REDDITBINARY | 88.0 | 92.4 ± 2.5 |
| REDDITMULTI5K | 54.8 | 57.5 ± 1.5 |
...@@ -19,7 +19,7 @@ class Parser(): ...@@ -19,7 +19,7 @@ class Parser():
# dataset # dataset
self.parser.add_argument( self.parser.add_argument(
'--dataset', type=str, default="MUTAG", '--dataset', type=str, default="MUTAG",
choices=['MUTAG', 'COLLAB', 'IMDBBINARY', 'IMDBMULTI'], choices=['MUTAG', 'COLLAB', 'IMDBBINARY', 'IMDBMULTI', 'NCI1', 'PROTEINS', 'PTC', 'REDDITBINARY', 'REDDITMULTI5K'],
help='name of dataset (default: MUTAG)') help='name of dataset (default: MUTAG)')
self.parser.add_argument( self.parser.add_argument(
'--batch_size', type=int, default=32, '--batch_size', type=int, default=32,
...@@ -30,6 +30,9 @@ class Parser(): ...@@ -30,6 +30,9 @@ class Parser():
self.parser.add_argument( self.parser.add_argument(
'--filename', type=str, default="", '--filename', type=str, default="",
help='output file') help='output file')
self.parser.add_argument(
'--degree_as_nlabel', action="store_true",
help='use one-hot encodings of node degrees as node feature vectors')
# device # device
self.parser.add_argument( self.parser.add_argument(
......
...@@ -86,8 +86,7 @@ def main(args): ...@@ -86,8 +86,7 @@ def main(args):
else: else:
args.device = torch.device("cpu") args.device = torch.device("cpu")
dataset = GINDataset(args.dataset, not args.learn_eps) dataset = GINDataset(args.dataset, not args.learn_eps, args.degree_as_nlabel)
trainloader, validloader = GINDataLoader( trainloader, validloader = GINDataLoader(
dataset, batch_size=args.batch_size, device=args.device, dataset, batch_size=args.batch_size, device=args.device,
seed=args.seed, shuffle=True, seed=args.seed, shuffle=True,
...@@ -129,11 +128,12 @@ def main(args): ...@@ -129,11 +128,12 @@ def main(args):
if not args.filename == "": if not args.filename == "":
with open(args.filename, 'a') as f: with open(args.filename, 'a') as f:
f.write('%s %s %s %s' % ( f.write('%s %s %s %s %s' % (
args.dataset, args.dataset,
args.learn_eps, args.learn_eps,
args.neighbor_pooling_type, args.neighbor_pooling_type,
args.graph_pooling_type args.graph_pooling_type,
epoch
)) ))
f.write("\n") f.write("\n")
f.write("%f %f %f %f" % ( f.write("%f %f %f %f" % (
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment